quietlight/skraakCLI - Change 3DVPQOKB6BX63XSBIYYCPWBL2RBG3LXZS3XPQBANJP2FWVRAOVZQC

big tidy up of tools/

Created by quietlight on May 11, 2026

3DVPQOKB6BX63XSBIYYCPWBL2RBG3LXZS3XPQBANJP2FWVRAOVZQC

Dependencies

In channels

main

Change contents

Replacement in tui/classify.go at line 15 [6.227156]
B:BD[6.227304] → [6.227304:227320]
```
	"skraak/tools"
```
[6.227304]
[6.227320]
```
	"skraak/tools/calls"
```
Replacement in tui/classify.go at line 96 [6.227156]
B:BD[6.229112] → [6.229112:229147]
```
	state        *tools.ClassifyState
```
[6.229112]
[6.229147]
```
	state        *calls.ClassifyState
```
Replacement in tui/classify.go at line 121 [6.227156]
B:BD[6.229957] → [6.229957:230002]
```
func New(state *tools.ClassifyState) Model {
```
[6.229957]
[6.230002]
```
func New(state *calls.ClassifyState) Model {
```

Replacement in tui/classify.go at line 124 [6.227156]

B:BD[6.230116] → [6.230116:230180]

	sorted := make([]tools.KeyBinding, len(state.Config.Bindings))

[6.230116]

[6.230180]

	sorted := make([]calls.KeyBinding, len(state.Config.Bindings))

Replacement in tui/classify.go at line 369 [6.227156]

B:BD[7.13046] → [7.13046:13119]

					m.state.ApplyBinding(&tools.BindingResult{Species: result.Species})

[7.13046]

[7.13119]

					m.state.ApplyBinding(&calls.BindingResult{Species: result.Species})

Replacement in tui/classify.go at line 550 [6.227156]

B:BD[6.239778] → [6.239778:239843]

func saveClip(state *tools.ClassifyState, prefix string) error {

[6.239778]

[6.239843]

func saveClip(state *calls.ClassifyState, prefix string) error {

Replacement in tui/classify.go at line 666 [6.227156]

B:BD[6.242744] → [6.242744:242827]

func playCurrentSegmentAtSpeed(state *tools.ClassifyState, speed float64) string {

[6.242744]

[6.242827]

func playCurrentSegmentAtSpeed(state *calls.ClassifyState, speed float64) string {

Replacement in tui/classify.go at line 800 [6.227156]

B:BD[6.245750] → [8.1110:1204]

			fmt.Fprintf(b, "  • %s\n", tools.FormatLabels([]*utils.Label{l}, m.state.Config.Filter))

[6.245750]

[6.245845]

			fmt.Fprintf(b, "  • %s\n", calls.FormatLabels([]*utils.Label{l}, m.state.Config.Filter))

Replacement in tui/classify.go at line 832 [6.227156]

B:BD[6.247228] → [6.247228:247337]

func generateSpectrogramImage(state *tools.ClassifyState, dataPath string, seg *utils.Segment) image.Image {

[6.247228]

[6.247337]

func generateSpectrogramImage(state *calls.ClassifyState, dataPath string, seg *utils.Segment) image.Image {

Replacement in tui/classify.go at line 849 [6.227156]

B:BD[6.247958] → [6.247958:248078]

func inlineImageCmd(state *tools.ClassifyState, protocol utils.ImageProtocol, gen uint64, currentGen *uint64) tea.Cmd {

[6.247958]

[6.248078]

func inlineImageCmd(state *calls.ClassifyState, protocol utils.ImageProtocol, gen uint64, currentGen *uint64) tea.Cmd {

File deletion: avianz_types.go

BF:BFD[6.248737] → [9.524:563]

BF:BFD[9.563] → [9.1:1]

B:BD[9.1] → [9.2:523]

package tools
// AviaNZMeta is the metadata element in a .data file
type AviaNZMeta struct {
	Operator string  `json:"Operator"`
	Reviewer *string `json:"Reviewer,omitempty"`
	Duration float64 `json:"Duration"`
}
// AviaNZLabel represents a species label in a segment
type AviaNZLabel struct {
	Species   string `json:"species"`
	Certainty int    `json:"certainty"`
	Filter    string `json:"filter"`
}
// AviaNZSegment represents a detection segment [start, end, freq_low, freq_high, labels]
type AviaNZSegment [5]any

File deletion: parallel_aggregate.go

BF:BFD[6.248737] → [10.2367:2412]

BF:BFD[10.2412] → [10.1:1]

B:BD[10.1] → [10.2:2366]

package tools
import (
	"fmt"
	"os"
	"path/filepath"
	"sort"
	"sync/atomic"
)
// parallelResult is the common interface for birda/raven worker results.
type parallelResult interface {
	filePath() string
	getCalls() []ClusteredCall
	wasWritten() bool
	wasSkipped() bool
	getError() error
}
// aggregateStats holds the collected results from a parallel fan-out/fan-in.
type aggregateStats struct {
	calls            []ClusteredCall
	speciesCount     map[string]int
	dataFilesWritten int
	dataFilesSkipped int
	filesProcessed   int
	filesDeleted     int
	firstErr         error
}
// aggregateResults collects results from a channel of parallelResult values,
// handling error tracking, species counting, optional file deletion, and
// progress reporting. Returns the aggregated stats.
func aggregateResults(
	results <-chan parallelResult,
	total int,
	processed *atomic.Int32,
	deleteFiles bool,
	progressHandler func(int, int, string),
) aggregateStats {
	var stats aggregateStats
	stats.speciesCount = make(map[string]int)
	for result := range results {
		if err := result.getError(); err != nil && stats.firstErr == nil {
			stats.firstErr = err
		}
		if result.wasWritten() {
			stats.dataFilesWritten++
		}
		if result.wasSkipped() {
			stats.dataFilesSkipped++
		}
		for _, call := range result.getCalls() {
			stats.calls = append(stats.calls, call)
			stats.speciesCount[call.EbirdCode]++
		}
		stats.filesProcessed++
		stats.maybeDeleteFile(deleteFiles, result)
		if progressHandler != nil {
			current := int(processed.Add(1))
			progressHandler(current, total, filepath.Base(result.filePath()))
		}
	}
	return stats
}
// maybeDeleteFile deletes the source file if requested and it was successfully processed.
func (s *aggregateStats) maybeDeleteFile(deleteFiles bool, result parallelResult) {
	if !deleteFiles || !result.wasWritten() {
		return
	}
	if err := os.Remove(result.filePath()); err != nil {
		if s.firstErr == nil {
			s.firstErr = fmt.Errorf("failed to delete %s: %w", result.filePath(), err)
		}
	} else {
		s.filesDeleted++
	}
}
// sortCallsByFileAndTime sorts calls by filename, then start time.
func sortCallsByFileAndTime(calls []ClusteredCall) {
	sort.Slice(calls, func(i, j int) bool {
		if calls[i].File != calls[j].File {
			return calls[i].File < calls[j].File
		}
		return calls[i].StartTime < calls[j].StartTime
	})
}

File deletion: calls_from_common.go

BF:BFD[6.248737] → [11.7706:7750]

BF:BFD[11.7750] → [11.1:1]

B:BD[11.1] → [11.2:7705]

package tools
import (
	"fmt"
	"os"
	"path/filepath"
	"sort"
	"sync"
	"sync/atomic"
)
// CallsFromSourceInput defines the common input for calls-from-source tools
type CallsFromSourceInput struct {
	Folder          string          `json:"folder"`
	File            string          `json:"file"`
	Delete          bool            `json:"delete"`
	ProgressHandler ProgressHandler `json:"-"` // Optional progress callback
}
// CallsFromSourceOutput defines the common output for calls-from-source tools
type CallsFromSourceOutput struct {
	Calls            []ClusteredCall `json:"calls"`
	TotalCalls       int             `json:"total_calls"`
	SpeciesCount     map[string]int  `json:"species_count"`
	DataFilesWritten int             `json:"data_files_written"`
	DataFilesSkipped int             `json:"data_files_skipped"`
	FilesProcessed   int             `json:"files_processed"`
	FilesDeleted     int             `json:"files_deleted"`
	Filter           string          `json:"filter"`
	Error            *string         `json:"error,omitempty"`
}
// CallSource abstracts a source of bird call data (Raven, BirdNET, etc.)
type CallSource interface {
	// Name returns the display name (e.g. "Raven", "BirdNET")
	Name() string
	// FindFiles discovers source files in the given folder
	FindFiles(folder string) ([]string, error)
	// ProcessFile processes a single source file and returns calls, write/skip status
	ProcessFile(path string, cache *DirCache) (calls []ClusteredCall, written, skipped bool, err error)
}
// callsFromSource is the shared entry point for all call source tools.
func callsFromSource(src CallSource, input CallsFromSourceInput) (CallsFromSourceOutput, error) {
	var output CallsFromSourceOutput
	output.Filter = src.Name()
	// Collect source files to process
	var files []string
	if input.File != "" {
		files = []string{input.File}
	} else if input.Folder != "" {
		var err error
		files, err = src.FindFiles(input.Folder)
		if err != nil {
			errMsg := fmt.Sprintf("Failed to find %s files: %v", src.Name(), err)
			output.Error = &errMsg
			return output, fmt.Errorf("%s", errMsg)
		}
	} else {
		errMsg := "Either --folder or --file must be specified"
		output.Error = &errMsg
		return output, fmt.Errorf("%s", errMsg)
	}
	if len(files) == 0 {
		errMsg := fmt.Sprintf("No %s files found", src.Name())
		output.Error = &errMsg
		return output, fmt.Errorf("%s", errMsg)
	}
	// Single file or small batch: process sequentially (avoid goroutine overhead)
	if len(files) < 10 {
		return callsFromSourceSequential(src, input, files)
	}
	// Large batch: parallel processing with DirCache
	return callsFromSourceParallel(src, input, files)
}
// callsFromSourceSequential processes source files one at a time (for small batches)
func callsFromSourceSequential(src CallSource, input CallsFromSourceInput, files []string) (CallsFromSourceOutput, error) {
	var output CallsFromSourceOutput
	output.Filter = src.Name()
	// Build DirCache once for the folder
	dirCaches := make(map[string]*DirCache)
	if input.Folder != "" {
		dirCaches[input.Folder] = NewDirCache(input.Folder)
	}
	speciesCount := make(map[string]int)
	var allCalls []ClusteredCall
	dataFilesWritten := 0
	dataFilesSkipped := 0
	filesProcessed := 0
	filesDeleted := 0
	for _, file := range files {
		dir := filepath.Dir(file)
		cache := dirCaches[dir]
		if cache == nil {
			cache = NewDirCache(dir)
			dirCaches[dir] = cache
		}
		calls, written, skipped, err := src.ProcessFile(file, cache)
		if err != nil {
			errMsg := fmt.Sprintf("Error processing %s: %v", file, err)
			output.Error = &errMsg
			return output, fmt.Errorf("%s", errMsg)
		}
		if written {
			dataFilesWritten++
		}
		if skipped {
			dataFilesSkipped++
		}
		for _, call := range calls {
			allCalls = append(allCalls, call)
			speciesCount[call.EbirdCode]++
		}
		filesProcessed++
		// Delete if requested and successfully processed
		if input.Delete && written {
			if err := os.Remove(file); err != nil {
				errMsg := fmt.Sprintf("Failed to delete %s: %v", file, err)
				output.Error = &errMsg
				return output, fmt.Errorf("%s", errMsg)
			}
			filesDeleted++
		}
		if input.ProgressHandler != nil {
			input.ProgressHandler(filesProcessed, len(files), filepath.Base(file))
		}
	}
	// Sort all calls by file, then start time
	sort.Slice(allCalls, func(i, j int) bool {
		if allCalls[i].File != allCalls[j].File {
			return allCalls[i].File < allCalls[j].File
		}
		return allCalls[i].StartTime < allCalls[j].StartTime
	})
	output.Calls = allCalls
	output.TotalCalls = len(allCalls)
	output.SpeciesCount = speciesCount
	output.DataFilesWritten = dataFilesWritten
	output.DataFilesSkipped = dataFilesSkipped
	output.FilesProcessed = filesProcessed
	output.FilesDeleted = filesDeleted
	return output, nil
}
// sourceJob represents a single file to process (generic over CallSource)
type sourceJob struct {
	filePath string
}
// sourceResult represents the result of processing a single source file
type sourceResult struct {
	path    string
	calls   []ClusteredCall
	written bool
	skipped bool
	err     error
}
func (r sourceResult) filePath() string          { return r.path }
func (r sourceResult) getCalls() []ClusteredCall { return r.calls }
func (r sourceResult) wasWritten() bool          { return r.written }
func (r sourceResult) wasSkipped() bool          { return r.skipped }
func (r sourceResult) getError() error           { return r.err }
// callsFromSourceParallel processes source files concurrently using a worker pool and DirCache
func callsFromSourceParallel(src CallSource, input CallsFromSourceInput, files []string) (CallsFromSourceOutput, error) {
	var output CallsFromSourceOutput
	output.Filter = src.Name()
	total := len(files)
	var processed atomic.Int32
	// Build DirCache for the folder
	dirCaches := &sync.Map{}
	if input.Folder != "" {
		cache := NewDirCache(input.Folder)
		dirCaches.Store(input.Folder, cache)
	}
	// Create job and result channels
	jobs := make(chan sourceJob, total)
	results := make(chan parallelResult, total)
	// Start workers
	var wg sync.WaitGroup
	for range DOT_DATA_WORKERS {
		wg.Add(1)
		go sourceWorker(src, dirCaches, jobs, results, &wg)
	}
	// Send jobs
	for _, file := range files {
		jobs <- sourceJob{filePath: file}
	}
	close(jobs)
	// Wait for workers to finish, then close results
	go func() {
		wg.Wait()
		close(results)
	}()
	// Collect results with progress reporting
	stats := aggregateResults(results, total, &processed, input.Delete, input.ProgressHandler)
	if stats.firstErr != nil {
		errMsg := stats.firstErr.Error()
		output.Error = &errMsg
		return output, stats.firstErr
	}
	sortCallsByFileAndTime(stats.calls)
	output.Calls = stats.calls
	output.TotalCalls = len(stats.calls)
	output.SpeciesCount = stats.speciesCount
	output.DataFilesWritten = stats.dataFilesWritten
	output.DataFilesSkipped = stats.dataFilesSkipped
	output.FilesProcessed = stats.filesProcessed
	output.FilesDeleted = stats.filesDeleted
	return output, nil
}
// sourceWorker processes source files from the jobs channel
func sourceWorker(src CallSource, dirCaches *sync.Map, jobs <-chan sourceJob, results chan<- parallelResult, wg *sync.WaitGroup) {
	defer wg.Done()
	for job := range jobs {
		dir := filepath.Dir(job.filePath)
		// Get or create DirCache for this directory
		var cache *DirCache
		if cached, ok := dirCaches.Load(dir); ok {
			cache = cached.(*DirCache)
		} else {
			cache = NewDirCache(dir)
			dirCaches.Store(dir, cache)
		}
		calls, written, skipped, err := src.ProcessFile(job.filePath, cache)
		results <- sourceResult{
			path:    job.filePath,
			calls:   calls,
			written: written,
			skipped: skipped,
			err:     err,
		}
	}
}

File deletion: isnight.go

BF:BFD[6.248737] → [6.303895:303929]

BF:BFD[6.303929] → [6.299379:299379]

B:BD[6.299379] → [6.299380:301119]

B:BD[6.301119] → [12.12269:12363]

∅:D[12.12363] → [6.301208:302224]

B:BD[6.301208] → [6.301208:302224]

∅:D[10.8272] → [6.302224:302225]

B:BD[6.302224] → [6.302224:302225]

∅:D[2.428] → [6.302225:302462]

∅:D[10.8476] → [6.302225:302462]

B:BD[6.302225] → [6.302225:302462]

B:BD[6.302719] → [6.302719:302722]

B:BD[6.302722] → [2.429:652]

∅:D[2.652] → [6.302986:303894]

B:BD[6.302986] → [6.302986:303894]

∅:D[2.378] → [10.8273:8476]

B:BD[6.302225] → [10.8273:8476]

B:BD[10.8476] → [2.379:428]

B:BD[6.302225] → [2.61:378]

B:BD[6.302224] → [10.8201:8272]

package tools
import (
	"fmt"
	"strings"
	"time"
	"github.com/sixdouglas/suncalc"
	"skraak/utils"
)
// IsNightInput defines the input parameters for the isnight tool
type IsNightInput struct {
	FilePath string  `json:"file_path"`
	Lat      float64 `json:"lat"`
	Lng      float64 `json:"lng"`
	Timezone string  `json:"timezone,omitempty"`
}
// IsNightOutput defines the output structure for the isnight tool
type IsNightOutput struct {
	FilePath      string  `json:"file_path"`
	TimestampUTC  string  `json:"timestamp_utc"`
	SolarNight    bool    `json:"solar_night"`
	CivilNight    bool    `json:"civil_night"`
	DiurnalActive bool    `json:"diurnal_active"`
	MoonPhase     float64 `json:"moon_phase"`
	DurationSec   float64 `json:"duration_seconds"`
	TimestampSrc  string  `json:"timestamp_source"`
	MidpointUTC   string  `json:"midpoint_utc"`
	SunriseUTC    string  `json:"sunrise_utc,omitempty"`
	SunsetUTC     string  `json:"sunset_utc,omitempty"`
	DawnUTC       string  `json:"dawn_utc,omitempty"`
	DuskUTC       string  `json:"dusk_utc,omitempty"`
}
// IsNight determines if a WAV file was recorded at night based on its
// metadata timestamp and the given GPS coordinates.
//
// Timestamp resolution order:
//  1. AudioMoth comment (timezone embedded)
//  2. Filename timestamp + timezone offset (requires --timezone)
//  3. File modification time (system local time)
func IsNight(input IsNightInput) (IsNightOutput, error) {
	var output IsNightOutput
	// Step 1: Parse WAV header
	metadata, err := utils.ParseWAVHeader(input.FilePath)
	if err != nil {
		return output, fmt.Errorf("WAV header parsing failed: %w", err)
	}
	output.DurationSec = metadata.Duration
	// Step 2: Resolve timestamp (use file mod time as fallback)
	tsResult, err := utils.ResolveTimestamp(metadata, input.FilePath, input.Timezone, true, nil)
	if err != nil {
		return output, fmt.Errorf("cannot determine recording timestamp: %w", err)
	}
	// Determine timestamp source label
	tsSource := "file_mod_time"
	if tsResult.IsAudioMoth {
		tsSource = "audiomoth_comment"
	} else if utils.HasTimestampFilename(input.FilePath) {
		tsSource = "filename"
	}
	// Step 3: Calculate astronomical data using recording midpoint
	astroData := utils.CalculateAstronomicalData(
		tsResult.Timestamp.UTC(),
		metadata.Duration,
		input.Lat,
		input.Lng,
	)
	// Step 4: Get sun event times for informational output
	midpoint := utils.CalculateMidpointTime(tsResult.Timestamp.UTC(), metadata.Duration)
	sunTimes := suncalc.GetTimes(midpoint, input.Lat, input.Lng)
	output.FilePath = input.FilePath
	output.TimestampUTC = tsResult.Timestamp.UTC().Format(time.RFC3339)
	output.SolarNight = astroData.SolarNight
	output.CivilNight = astroData.CivilNight
	output.MoonPhase = astroData.MoonPhase
	output.TimestampSrc = tsSource
	output.MidpointUTC = midpoint.Format(time.RFC3339)
	if dawn, ok := sunTimes[suncalc.Dawn]; ok && !dawn.Value.IsZero() {
		if sunset, ok := sunTimes[suncalc.Sunset]; ok && !sunset.Value.IsZero() {
			output.DiurnalActive = !midpoint.Before(dawn.Value) && !midpoint.After(sunset.Value)
		}
	}
	output.SunriseUTC = sunTimeUTC(sunTimes, suncalc.Sunrise)
	output.SunsetUTC = sunTimeUTC(sunTimes, suncalc.Sunset)
	output.DawnUTC = sunTimeUTC(sunTimes, suncalc.Dawn)
	output.DuskUTC = sunTimeUTC(sunTimes, suncalc.Dusk)
}
// String returns a human-readable summary of the isnight result
func (o IsNightOutput) String() string {
	var sb strings.Builder
	fmt.Fprintf(&sb, "File: %s\n", o.FilePath)
	fmt.Fprintf(&sb, "Timestamp (UTC): %s\n", o.TimestampUTC)
	fmt.Fprintf(&sb, "Midpoint (UTC):  %s\n", o.MidpointUTC)
	fmt.Fprintf(&sb, "Duration: %.1f seconds\n", o.DurationSec)
	fmt.Fprintf(&sb, "Source: %s\n", o.TimestampSrc)
	fmt.Fprintf(&sb, "Solar night: %v\n", o.SolarNight)
	fmt.Fprintf(&sb, "Civil night: %v\n", o.CivilNight)
	fmt.Fprintf(&sb, "Moon phase: %.2f\n", o.MoonPhase)
	if o.SunriseUTC != "" {
		fmt.Fprintf(&sb, "Sunrise (UTC): %s\n", o.SunriseUTC)
	}
	if o.SunsetUTC != "" {
		fmt.Fprintf(&sb, "Sunset (UTC):  %s\n", o.SunsetUTC)
	}
	if o.DawnUTC != "" {
		fmt.Fprintf(&sb, "Dawn (UTC):    %s\n", o.DawnUTC)
	}
	if o.DuskUTC != "" {
		fmt.Fprintf(&sb, "Dusk (UTC):    %s\n", o.DuskUTC)
	}
	return sb.String()
}
// populateSunTimes fills in sun event times and diurnal status from suncalc results.
func populateSunTimes(output *IsNightOutput, sunTimes map[suncalc.DayTimeName]suncalc.DayTime, midpoint time.Time) {
	// Diurnal: midpoint is between dawn and sunset
// sunTimeUTC returns the UTC RFC3339 string for a suncalc event, or "" if absent/zero.
func sunTimeUTC(sunTimes map[suncalc.DayTimeName]suncalc.DayTime, name suncalc.DayTimeName) string {
	if entry, ok := sunTimes[name]; ok && !entry.Value.IsZero() {
		return entry.Value.UTC().Format(time.RFC3339)
	}
	return ""
}
	populateSunTimes(&output, sunTimes, midpoint)
	return output, nil
}

File deletion: import_unstructured.go

BF:BFD[6.248737] → [6.315559:315605]

BF:BFD[6.315605] → [6.307677:307677]

B:BD[6.307677] → [6.307678:307713]

∅:D[11.15619] → [6.307713:307947]

B:BD[6.307713] → [6.307713:307947]

∅:D[4.1644] → [6.307947:309320]

B:BD[6.307947] → [6.307947:309320]

B:BD[6.309320] → [11.15620:15658]

∅:D[11.15658] → [6.309515:309770]

B:BD[6.309515] → [6.309515:309770]

∅:D[4.1773] → [11.15766:15905]

B:BD[11.15766] → [11.15766:15905]

∅:D[11.15905] → [6.310014:310015]

B:BD[6.310014] → [6.310014:310015]

B:BD[6.310015] → [11.15906:16151]

∅:D[11.16151] → [6.310151:310152]

B:BD[6.310151] → [6.310151:310152]

B:BD[6.310152] → [11.16152:16297]

∅:D[11.16297] → [6.310375:310379]

B:BD[6.310375] → [6.310375:310379]

B:BD[6.310379] → [11.16298:16353]

∅:D[11.16353] → [6.310519:310523]

B:BD[6.310519] → [6.310519:310523]

B:BD[6.310655] → [6.310655:313402]

B:BD[6.313556] → [6.313556:313559]

B:BD[6.313583] → [6.313583:313584]

∅:D[4.1856] → [11.16415:16556]

B:BD[11.16415] → [11.16415:16556]

∅:D[11.16556] → [6.313964:313965]

B:BD[6.313964] → [6.313964:313965]

B:BD[6.313965] → [11.16557:16706]

∅:D[11.16706] → [6.314113:314114]

B:BD[6.314113] → [6.314113:314114]

B:BD[6.314114] → [11.16707:16724]

∅:D[11.16724] → [6.314126:314502]

B:BD[6.314126] → [6.314126:314502]

B:BD[6.314502] → [13.2827:2858]

∅:D[13.2858] → [6.314524:315038]

B:BD[6.314524] → [6.314524:315038]

B:BD[6.315038] → [13.2859:2890]

∅:D[13.2890] → [6.315060:315282]

B:BD[6.315060] → [6.315060:315282]

B:BD[6.315282] → [13.2891:2922]

∅:D[13.2922] → [6.315304:315558]

B:BD[6.315304] → [6.315304:315558]

B:BD[6.313584] → [4.1774:1856]

B:BD[6.309770] → [4.1645:1773]

B:BD[6.307947] → [4.1608:1644]

B:BD[6.307713] → [11.15603:15619]

package tools
import (
	"context"
	"fmt"
	"io/fs"
	"os"
	"path/filepath"
	"strings"
	"time"
	"skraak/db"
	"skraak/utils"
)
// ImportUnstructuredInput defines the input parameters for importing files into an unstructured dataset
type ImportUnstructuredInput struct {
	DatasetID  string `json:"dataset_id"`
	FolderPath string `json:"folder_path"`
	Recursive  *bool  `json:"recursive,omitempty"`
}
// ImportUnstructuredOutput defines the output structure
type ImportUnstructuredOutput struct {
	TotalFiles     int                     `json:"total_files"`
	ImportedFiles  int                     `json:"imported_files"`
	SkippedFiles   int                     `json:"skipped_files"` // Duplicates
	FailedFiles    int                     `json:"failed_files"`
	TotalDuration  float64                 `json:"total_duration_seconds"`
	ProcessingTime string                  `json:"processing_time"`
	Errors         []utils.FileImportError `json:"errors,omitempty"`
}
// ImportUnstructured imports WAV files into an unstructured dataset
// Files are stored with minimal metadata: hash, duration, sample_rate, file_mod_time as timestamp
// No location/cluster hierarchy, no astronomical data, no AudioMoth parsing
func ImportUnstructured(
	ctx context.Context,
	input ImportUnstructuredInput,
) (ImportUnstructuredOutput, error) {
	startTime := time.Now()
	var output ImportUnstructuredOutput
	// Default recursive to true
	recursive := true
	if input.Recursive != nil {
		recursive = *input.Recursive
	}
	// Validate input
	if err := validateUnstructuredInput(input); err != nil {
		return output, fmt.Errorf("validation failed: %w", err)
	}
	// Scan for WAV files (no DB needed)
	files, scanErrors := scanWavFiles(input.FolderPath, recursive)
	output.Errors = append(output.Errors, scanErrors...)
	output.TotalFiles = len(files)
	if len(files) == 0 {
		output.ProcessingTime = time.Since(startTime).String()
		return output, nil
	}
		// Process each file
		for _, filePath := range files {
			fileResult, procErr := processUnstructuredFile(tx, filePath, input.DatasetID)
			if procErr != nil {
				output.FailedFiles++
				output.Errors = append(output.Errors, utils.FileImportError{
					FileName: filepath.Base(filePath),
					Error:    procErr.Error(),
					Stage:    utils.StageProcess,
				})
				continue
			}
			if fileResult.Skipped {
				output.SkippedFiles++
			} else {
				output.ImportedFiles++
				output.TotalDuration += fileResult.Duration
			}
		}
		return nil
	})
	if err != nil {
		return output, err
	}
	output.ProcessingTime = time.Since(startTime).String()
	return output, nil
}
// unstructuredFileResult holds the result of processing a single file
type unstructuredFileResult struct {
	Skipped  bool    // True if duplicate
	Duration float64 // Duration in seconds
}
// processUnstructuredFile processes a single WAV file for unstructured import
func processUnstructuredFile(tx *db.LoggedTx, filePath, datasetID string) (*unstructuredFileResult, error) {
	result := &unstructuredFileResult{}
	// Step 1: Parse WAV header
	metadata, err := utils.ParseWAVHeader(filePath)
	if err != nil {
		return nil, fmt.Errorf("WAV header parsing failed: %w", err)
	}
	// Step 2: Calculate hash
	hash, err := utils.ComputeXXH64(filePath)
	if err != nil {
		return nil, fmt.Errorf("hash calculation failed: %w", err)
	}
	// Step 3: Check for duplicate - if exists, skip entirely (do not link to dataset)
	_, isDuplicate, err := utils.CheckDuplicateHash(tx, hash)
	if err != nil {
		return nil, fmt.Errorf("duplicate check failed: %w", err)
	}
	if isDuplicate {
		// File already exists in database - skip completely, do not link to dataset
		result.Skipped = true
		result.Duration = metadata.Duration
		return result, nil
	}
	// Step 4: Generate file ID
	fileID, err := utils.GenerateLongID()
	if err != nil {
		return nil, fmt.Errorf("ID generation failed: %w", err)
	}
	// Step 5: Use file modification time as timestamp (no timezone conversion)
	timestamp := metadata.FileModTime
	// Step 6: Insert into file table
	_, err = tx.Exec(`
		INSERT INTO file (
			id, file_name, xxh64_hash, location_id, cluster_id,
			timestamp_local, duration, sample_rate,
			maybe_solar_night, maybe_civil_night, moon_phase,
			active
		) VALUES (?, ?, ?, NULL, NULL, ?, ?, ?, NULL, NULL, NULL, TRUE)
	`,
		fileID,
		filepath.Base(filePath),
		hash,
		timestamp,
		metadata.Duration,
		metadata.SampleRate,
	)
	if err != nil {
		return nil, fmt.Errorf("file insert failed: %w", err)
	}
	// Step 7: Insert into file_dataset table
	_, err = tx.Exec(
		"INSERT INTO file_dataset (file_id, dataset_id) VALUES (?, ?)",
		fileID, datasetID,
	)
	if err != nil {
		return nil, fmt.Errorf("file_dataset insert failed: %w", err)
	}
	result.Duration = metadata.Duration
	return result, nil
}
// validateUnstructuredInput validates the input parameters
func validateUnstructuredInput(input ImportUnstructuredInput) error {
	// Validate dataset ID format
	if err := utils.ValidateShortID(input.DatasetID, "dataset_id"); err != nil {
		return err
	}
	// Verify folder exists
	info, err := os.Stat(input.FolderPath)
	if err != nil {
		return fmt.Errorf("folder not accessible: %w", err)
	}
	if !info.IsDir() {
		return fmt.Errorf("path is not a directory: %s", input.FolderPath)
	}
		// Verify dataset exists and is active
		if _, err := db.DatasetExistsAndActive(database, input.DatasetID); err != nil {
			return err
		}
		// Verify dataset is 'unstructured' type
		if err := db.ValidateDatasetTypeUnstructured(database, input.DatasetID); err != nil {
			return err
		}
		return nil
	})
}
// scanWavFiles scans a folder for WAV files
func scanWavFiles(folderPath string, recursive bool) ([]string, []utils.FileImportError) {
	var files []string
	var errors []utils.FileImportError
	walkFunc := func(path string, d fs.DirEntry, err error) error {
		if err != nil {
			errors = append(errors, utils.FileImportError{
				FileName: path,
				Error:    err.Error(),
				Stage:    utils.StageScan,
			})
			return nil
		}
		// Skip directories if not recursive
		if d.IsDir() {
			if !recursive && path != folderPath {
				return fs.SkipDir
			}
			return nil
		}
		// Check for .wav extension (case-insensitive)
		if strings.HasSuffix(strings.ToLower(d.Name()), ".wav") {
			files = append(files, path)
		}
		return nil
	}
	if recursive {
		if err := filepath.WalkDir(folderPath, walkFunc); err != nil {
			errors = append(errors, utils.FileImportError{
				FileName: folderPath,
				Error:    err.Error(),
				Stage:    utils.StageScan,
			})
		}
	} else {
		// Non-recursive: only scan top-level
		entries, err := os.ReadDir(folderPath)
		if err != nil {
			errors = append(errors, utils.FileImportError{
				FileName: folderPath,
				Error:    err.Error(),
				Stage:    utils.StageScan,
			})
			return nil, errors
		}
		for _, entry := range entries {
			if !entry.IsDir() && strings.HasSuffix(strings.ToLower(entry.Name()), ".wav") {
				files = append(files, filepath.Join(folderPath, entry.Name()))
			}
		}
	}
	return files, errors
}
	return db.WithReadDB(resolveDBPath(input.DBPath), func(database *sql.DB) error {
	err := db.WithWriteTx(ctx, resolveDBPath(input.DBPath), "import_unstructured", func(database *sql.DB, tx *db.LoggedTx) error {
	DBPath     string `json:"db_path"`
	"database/sql"

File deletion: import_segments_test.go

BF:BFD[6.248737] → [6.318117:318164]

BF:BFD[6.318164] → [6.315607:315607]

B:BD[6.315607] → [6.315608:318116]

package tools
import (
	"testing"
	"skraak/utils"
)
func TestValidateSegmentImportInput(t *testing.T) {
	t.Run("invalid dataset ID - too short", func(t *testing.T) {
		input := ImportSegmentsInput{
			DatasetID: "abc",
		}
		err := validateSegmentImportInput(input)
		if err == nil {
			t.Fatal("expected error for short dataset ID")
		}
	})
	t.Run("invalid dataset ID - too long", func(t *testing.T) {
		input := ImportSegmentsInput{
			DatasetID: "abc123def456ghi789",
		}
		err := validateSegmentImportInput(input)
		if err == nil {
			t.Fatal("expected error for long dataset ID")
		}
	})
	t.Run("invalid dataset ID - invalid characters", func(t *testing.T) {
		input := ImportSegmentsInput{
			DatasetID: "abc123!!!456",
		}
		err := validateSegmentImportInput(input)
		if err == nil {
			t.Fatal("expected error for invalid characters in dataset ID")
		}
	})
	t.Run("invalid location ID", func(t *testing.T) {
		input := ImportSegmentsInput{
			DatasetID:  "abc123def456",
			LocationID: "invalid",
		}
		err := validateSegmentImportInput(input)
		if err == nil {
			t.Fatal("expected error for invalid location ID")
		}
	})
	t.Run("invalid cluster ID", func(t *testing.T) {
		input := ImportSegmentsInput{
			DatasetID:  "abc123def456",
			LocationID: "xyz789uvw012",
			ClusterID:  "invalid",
		}
		err := validateSegmentImportInput(input)
		if err == nil {
			t.Fatal("expected error for invalid cluster ID")
		}
	})
}
func TestCountTotalSegments(t *testing.T) {
	t.Run("empty", func(t *testing.T) {
		count := countTotalSegments(map[string]scannedDataFile{})
		if count != 0 {
			t.Errorf("expected 0, got %d", count)
		}
	})
	t.Run("single file - no segments", func(t *testing.T) {
		files := map[string]scannedDataFile{
			"file1": {Segments: []*utils.Segment{}},
		}
		count := countTotalSegments(files)
		if count != 0 {
			t.Errorf("expected 0, got %d", count)
		}
	})
	t.Run("single file - multiple segments", func(t *testing.T) {
		files := map[string]scannedDataFile{
			"file1": {Segments: []*utils.Segment{{}, {}, {}}},
		}
		count := countTotalSegments(files)
		if count != 3 {
			t.Errorf("expected 3, got %d", count)
		}
	})
	t.Run("multiple files", func(t *testing.T) {
		files := map[string]scannedDataFile{
			"file1": {Segments: []*utils.Segment{{}, {}}},
			"file2": {Segments: []*utils.Segment{{}}},
			"file3": {Segments: []*utils.Segment{{}, {}, {}, {}}},
		}
		count := countTotalSegments(files)
		if count != 7 {
			t.Errorf("expected 7, got %d", count)
		}
	})
}

File deletion: import_segments.go

BF:BFD[6.248737] → [6.345207:345249]

BF:BFD[6.345249] → [6.318166:318166]

B:BD[6.318166] → [6.318167:318415]

∅:D[4.1900] → [6.318415:320277]

B:BD[6.318415] → [6.318415:320277]

B:BD[6.320277] → [13.2925:3062]

∅:D[13.3062] → [6.320415:320618]

B:BD[6.320415] → [6.320415:320618]

∅:D[14.14570] → [6.320618:321540]

B:BD[6.320618] → [6.320618:321540]

B:BD[6.321903] → [6.321903:321907]

B:BD[6.321907] → [14.14571:14627]

∅:D[4.1967] → [6.321987:322095]

B:BD[6.321987] → [6.321987:322095]

B:BD[6.322855] → [6.322855:322856]

B:BD[6.322856] → [14.14628:14769]

∅:D[14.14769] → [6.323005:323022]

B:BD[6.323005] → [6.323005:323022]

B:BD[6.323022] → [14.14770:14791]

∅:D[14.14791] → [6.323098:323101]

B:BD[6.323098] → [6.323098:323101]

B:BD[6.323101] → [14.14792:14836]

∅:D[14.14836] → [6.323383:323619]

B:BD[6.323383] → [6.323383:323619]

B:BD[6.323619] → [14.14837:14990]

∅:D[14.14990] → [6.323752:323811]

B:BD[6.323752] → [6.323752:323811]

B:BD[6.323838] → [6.323838:324083]

B:BD[6.324083] → [14.14991:15113]

∅:D[14.15113] → [6.324197:325503]

B:BD[6.324197] → [6.324197:325503]

B:BD[6.325503] → [11.16727:16816]

∅:D[11.16816] → [6.325799:325802]

B:BD[6.325799] → [6.325799:325802]

B:BD[6.325918] → [6.325918:325960]

B:BD[6.325960] → [11.16817:16922]

∅:D[11.16922] → [6.326223:326226]

B:BD[6.326223] → [6.326223:326226]

B:BD[6.326334] → [6.326334:326376]

B:BD[6.326376] → [11.16923:17020]

∅:D[11.17020] → [6.326742:327484]

B:BD[6.326742] → [6.326742:327484]

B:BD[6.327484] → [13.3063:3099]

∅:D[13.3099] → [6.327511:327792]

B:BD[6.327511] → [6.327511:327792]

B:BD[6.327792] → [13.3100:3136]

∅:D[13.3136] → [6.327819:328945]

B:BD[6.327819] → [6.327819:328945]

B:BD[6.328945] → [15.5491:5601]

∅:D[15.5601] → [6.329058:330561]

B:BD[6.329058] → [6.329058:330561]

B:BD[6.330561] → [15.5602:5724]

∅:D[15.5724] → [6.330686:332386]

B:BD[6.330686] → [6.330686:332386]

B:BD[6.332386] → [13.3137:3167]

∅:D[13.3167] → [6.332407:332868]

B:BD[6.332407] → [6.332407:332868]

B:BD[6.332868] → [13.3168:3204]

∅:D[13.3204] → [6.332895:333112]

B:BD[6.332895] → [6.332895:333112]

B:BD[6.333112] → [13.3205:3241]

∅:D[13.3241] → [6.333139:333648]

B:BD[6.333139] → [6.333139:333648]

B:BD[6.333648] → [13.3242:3278]

∅:D[13.3278] → [6.333675:333885]

B:BD[6.333675] → [6.333675:333885]

B:BD[6.333885] → [13.3279:3315]

∅:D[13.3315] → [6.333912:334374]

B:BD[6.333912] → [6.333912:334374]

B:BD[6.334374] → [13.3316:3352]

∅:D[13.3352] → [6.334401:334603]

B:BD[6.334401] → [6.334401:334603]

B:BD[6.334603] → [13.3353:3389]

∅:D[13.3389] → [6.334630:335009]

B:BD[6.334630] → [6.334630:335009]

∅:D[16.7820] → [6.335009:335650]

B:BD[6.335009] → [6.335009:335650]

B:BD[6.335672] → [6.335672:335798]

B:BD[6.335798] → [13.3967:3998]

∅:D[13.3998] → [6.335820:335947]

B:BD[6.335820] → [6.335820:335947]

B:BD[6.335979] → [6.335979:336086]

B:BD[6.336086] → [16.7821:7833]

∅:D[16.7833] → [6.336136:336270]

B:BD[6.336136] → [6.336136:336270]

B:BD[6.336322] → [6.336322:336453]

B:BD[6.336475] → [6.336475:336516]

B:BD[6.336516] → [16.7834:8049]

∅:D[16.8049] → [6.342594:342595]

B:BD[6.342594] → [6.342594:342595]

B:BD[6.342595] → [16.8050:8240]

∅:D[16.8240] → [6.342792:342841]

B:BD[6.342792] → [6.342792:342841]

B:BD[6.342841] → [13.3999:4065]

∅:D[13.4065] → [6.342909:342997]

∅:D[16.8298] → [6.342909:342997]

B:BD[6.342909] → [6.342909:342997]

B:BD[6.343097] → [6.343097:343109]

B:BD[6.343109] → [16.8299:8434]

∅:D[16.8434] → [6.343172:343234]

B:BD[6.343172] → [6.343172:343234]

B:BD[6.343257] → [6.343257:343340]

B:BD[6.343340] → [13.4066:4097]

∅:D[13.4097] → [6.343362:343551]

B:BD[6.343362] → [6.343362:343551]

∅:D[16.9162] → [6.343551:343552]

B:BD[6.343551] → [6.343551:343552]

∅:D[16.11021] → [6.343552:344184]

B:BD[6.343552] → [6.343552:344184]

B:BD[6.344184] → [13.4354:4386]

∅:D[13.4386] → [6.344207:345067]

B:BD[6.344207] → [6.344207:345067]

B:BD[6.345067] → [13.4387:4419]

∅:D[13.4419] → [6.345090:345206]

B:BD[6.345090] → [6.345090:345206]

B:BD[6.343552] → [16.9163:9241]

B:BD[16.9241] → [13.4162:4225]

∅:D[13.4225] → [16.9295:9557]

B:BD[16.9295] → [16.9295:9557]

B:BD[16.9557] → [13.4226:4289]

∅:D[13.4289] → [16.9611:10092]

B:BD[16.9611] → [16.9611:10092]

B:BD[16.10092] → [13.4290:4353]

∅:D[13.4353] → [16.10146:11021]

B:BD[16.10146] → [16.10146:11021]

B:BD[6.343551] → [16.8435:8956]

B:BD[16.8956] → [13.4098:4161]

∅:D[13.4161] → [16.9010:9162]

B:BD[16.9010] → [16.9010:9162]

B:BD[6.335009] → [16.3049:3797]

B:BD[16.3797] → [13.3390:3453]

∅:D[13.3453] → [16.3851:4056]

B:BD[16.3851] → [16.3851:4056]

B:BD[16.4056] → [13.3454:3517]

∅:D[13.3517] → [16.4110:4304]

B:BD[16.4110] → [16.4110:4304]

B:BD[16.4304] → [13.3518:3581]

∅:D[13.3581] → [16.4358:4558]

B:BD[16.4358] → [16.4358:4558]

B:BD[16.4558] → [13.3582:3645]

∅:D[13.3645] → [16.4612:5016]

B:BD[16.4612] → [16.4612:5016]

B:BD[16.5016] → [13.3646:3709]

∅:D[13.3709] → [16.5070:5608]

B:BD[16.5070] → [16.5070:5608]

B:BD[16.5608] → [13.3710:3774]

∅:D[13.3774] → [16.5663:7013]

B:BD[16.5663] → [16.5663:7013]

B:BD[16.7013] → [13.3775:3838]

∅:D[13.3838] → [16.7067:7244]

B:BD[16.7067] → [16.7067:7244]

B:BD[16.7244] → [13.3839:3902]

∅:D[13.3902] → [16.7298:7679]

B:BD[16.7298] → [16.7298:7679]

B:BD[16.7679] → [13.3903:3966]

∅:D[13.3966] → [16.7733:7820]

B:BD[16.7733] → [16.7733:7820]

B:BD[14.14627] → [4.1901:1967]

B:BD[6.320618] → [14.12253:14570]

B:BD[6.318415] → [4.1859:1900]

package tools
import (
	"context"
	"database/sql"
	"fmt"
	"os"
	"path/filepath"
	"strings"
	"time"
	"skraak/db"
	"skraak/utils"
)
// ImportSegmentsInput defines the input parameters for the import_segments tool
type ImportSegmentsInput struct {
	Folder          string `json:"folder"`
	Mapping         string `json:"mapping"`
	DatasetID       string `json:"dataset_id"`
	LocationID      string `json:"location_id"`
	ClusterID       string `json:"cluster_id"`
	ProgressHandler func(processed, total int, message string)
}
// ImportSegmentsOutput defines the output structure for the import_segments tool
type ImportSegmentsOutput struct {
	Summary  ImportSegmentsSummary `json:"summary"`
	Segments []SegmentImport       `json:"segments"`
	Errors   []ImportSegmentError  `json:"errors,omitempty"`
}
// ImportSegmentsSummary provides summary statistics for the import operation
type ImportSegmentsSummary struct {
	DataFilesFound     int   `json:"data_files_found"`
	DataFilesProcessed int   `json:"data_files_processed"`
	TotalSegments      int   `json:"total_segments"`
	ImportedSegments   int   `json:"imported_segments"`
	ImportedLabels     int   `json:"imported_labels"`
	ImportedSubtypes   int   `json:"imported_subtypes"`
	ProcessingTimeMs   int64 `json:"processing_time_ms"`
}
// SegmentImport represents an imported segment in the output
type SegmentImport struct {
	SegmentID string        `json:"segment_id"`
	FileName  string        `json:"file_name"`
	StartTime float64       `json:"start_time"`
	EndTime   float64       `json:"end_time"`
	FreqLow   float64       `json:"freq_low"`
	FreqHigh  float64       `json:"freq_high"`
	Labels    []LabelImport `json:"labels"`
}
// LabelImport represents an imported label in the output
type LabelImport struct {
	LabelID   string `json:"label_id"`
	Species   string `json:"species"`
	CallType  string `json:"calltype,omitempty"`
	Filter    string `json:"filter"`
	Certainty int    `json:"certainty"`
	Comment   string `json:"comment,omitempty"`
}
// ImportSegmentError records errors encountered during segment import
type ImportSegmentError struct {
	File    string            `json:"file,omitempty"`
	Stage   utils.ImportStage `json:"stage"`
	Message string            `json:"message"`
}
// scannedDataFile holds parsed data for a .data file
type scannedDataFile struct {
	DataPath string
	WavPath  string
	WavHash  string
	FileID   string
	Duration float64
	Segments []*utils.Segment
}
// ImportSegments imports segments from AviaNZ .data files into the database
func ImportSegments(ctx context.Context, input ImportSegmentsInput) (ImportSegmentsOutput, error) {
	startTime := time.Now()
	var output ImportSegmentsOutput
	output.Segments = make([]SegmentImport, 0)
	output.Errors = make([]ImportSegmentError, 0)
	// Phase A: Input Validation
	if err := validateSegmentImportInput(input); err != nil {
		return output, err
	}
	// Load mapping file
	mapping, err := utils.LoadMappingFile(input.Mapping)
	if err != nil {
		return output, fmt.Errorf("failed to load mapping file: %w", err)
	}
	// Find .data files
	dataFiles, err := utils.FindDataFiles(input.Folder)
	if err != nil {
		return output, fmt.Errorf("failed to find .data files: %w", err)
	}
	output.Summary.DataFilesFound = len(dataFiles)
	if len(dataFiles) == 0 {
		return output, fmt.Errorf("no .data files found in folder: %s", input.Folder)
	}
	// Phase B+C: Parse data files and validate against DB
	if err != nil {
		return output, fmt.Errorf("failed to open database: %w", err)
	}
	defer database.Close()
	val, valErrors, err := validateAndPrepareSegments(database, input, mapping, dataFiles)
	output.Errors = append(output.Errors, valErrors...)
	if err != nil {
		return output, err
	}
	if val == nil || len(val.fileIDMap) == 0 {
		output.Summary.ProcessingTimeMs = time.Since(startTime).Milliseconds()
		return output, nil
	}
	// Phase D: Transactional Import
	importedSegments, importedLabels, importedSubtypes, fileUpdates, importErrors := importSegmentsIntoDB(
		ctx, database, val.fileIDMap, val.scannedFiles, mapping, val.filterIDMap, val.speciesIDMap, val.calltypeIDMap, input.DatasetID, input.ProgressHandler,
	)
	output.Errors = append(output.Errors, importErrors...)
	output.Segments = append(output.Segments, importedSegments...)
	// Phase E: Write IDs back to .data files
	if len(fileUpdates) > 0 {
		writeErrors := writeIDsToDataFiles(fileUpdates)
		output.Errors = append(output.Errors, writeErrors...)
	}
	output.Summary.DataFilesProcessed = len(val.fileIDMap)
	output.Summary.TotalSegments = countTotalSegments(val.fileIDMap)
	output.Summary.ImportedSegments = len(importedSegments)
	output.Summary.ImportedLabels = importedLabels
	output.Summary.ImportedSubtypes = importedSubtypes
	output.Summary.ProcessingTimeMs = time.Since(startTime).Milliseconds()
	return output, nil
}
// validateSegmentImportInput validates input parameters
func validateSegmentImportInput(input ImportSegmentsInput) error {
	// Validate folder exists
	if info, err := os.Stat(input.Folder); err != nil {
		return fmt.Errorf("folder does not exist: %s", input.Folder)
	} else if !info.IsDir() {
		return fmt.Errorf("path is not a folder: %s", input.Folder)
	}
	// Validate mapping file exists
	if _, err := os.Stat(input.Mapping); err != nil {
		return fmt.Errorf("mapping file does not exist: %s", input.Mapping)
	}
	// Validate IDs
	if err := utils.ValidateShortID(input.DatasetID, "dataset_id"); err != nil {
		return err
	}
	if err := utils.ValidateShortID(input.LocationID, "location_id"); err != nil {
		return err
	}
	if err := utils.ValidateShortID(input.ClusterID, "cluster_id"); err != nil {
		return err
	}
	return nil
}
// validateSegmentHierarchy validates dataset/location/cluster relationships
func validateSegmentHierarchy(dbConn *sql.DB, datasetID, locationID, clusterID string) error {
	// Validate dataset exists and is structured
	if err := db.ValidateDatasetTypeForImport(dbConn, datasetID); err != nil {
		return err
	}
	// Validate location belongs to dataset
	if err := db.ValidateLocationBelongsToDataset(dbConn, locationID, datasetID); err != nil {
		return err
	}
	// Validate cluster belongs to location
	if err := db.ClusterBelongsToLocation(dbConn, clusterID, locationID); err != nil {
		return err
	}
	return nil
}
// scanAllDataFiles parses all .data files and collects unique values
func scanAllDataFiles(dataFiles []string, folder string) (
	[]scannedDataFile,
	[]ImportSegmentError,
	map[string]bool,
	map[string]bool,
	map[string]map[string]bool,
) {
	var scanned []scannedDataFile
	var errors []ImportSegmentError
	uniqueFilters := make(map[string]bool)
	uniqueSpecies := make(map[string]bool)
	uniqueCalltypes := make(map[string]map[string]bool) // species -> calltype -> true
	for _, dataPath := range dataFiles {
		// Find corresponding WAV file
		wavPath := strings.TrimSuffix(dataPath, ".data")
		if _, err := os.Stat(wavPath); err != nil {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(dataPath),
				Stage:   utils.StageValidation,
				Message: fmt.Sprintf("corresponding WAV file not found: %s", filepath.Base(wavPath)),
			})
			continue
		}
		// Parse .data file
		df, err := utils.ParseDataFile(dataPath)
		if err != nil {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(dataPath),
				Stage:   utils.StageValidation,
				Message: fmt.Sprintf("failed to parse .data file: %v", err),
			})
			continue
		}
		// Collect unique filters, species, calltypes
		for _, seg := range df.Segments {
			for _, label := range seg.Labels {
				uniqueFilters[label.Filter] = true
				uniqueSpecies[label.Species] = true
				if label.CallType != "" {
					if uniqueCalltypes[label.Species] == nil {
						uniqueCalltypes[label.Species] = make(map[string]bool)
					}
					uniqueCalltypes[label.Species][label.CallType] = true
				}
			}
		}
		scanned = append(scanned, scannedDataFile{
			DataPath: dataPath,
			WavPath:  wavPath,
			Duration: df.Meta.Duration,
			Segments: df.Segments,
		})
	}
	return scanned, errors, uniqueFilters, uniqueSpecies, uniqueCalltypes
}
// validateFiltersExist checks all filters exist in DB and returns ID map
func validateFiltersExist(dbConn *sql.DB, filterNames map[string]bool) (map[string]string, error) {
	filterIDMap := make(map[string]string)
	if len(filterNames) == 0 {
		return filterIDMap, nil
	}
	names := make([]string, 0, len(filterNames))
	for name := range filterNames {
		names = append(names, name)
	}
	query := `SELECT id, name FROM filter WHERE name IN (` + db.Placeholders(len(names)) + `) AND active = true`
	args := make([]any, len(names))
	for i, name := range names {
		args[i] = name
	}
	rows, err := dbConn.Query(query, args...)
	if err != nil {
		return nil, fmt.Errorf("failed to query filters: %w", err)
	}
	defer rows.Close()
	for rows.Next() {
		var id, name string
		if err := rows.Scan(&id, &name); err == nil {
			filterIDMap[name] = id
		}
	}
	// Check for missing filters
	var missing []string
	for name := range filterNames {
		if _, exists := filterIDMap[name]; !exists {
			missing = append(missing, name)
		}
	}
	if len(missing) > 0 {
		return nil, fmt.Errorf("filters not found in database: [%s]", strings.Join(missing, ", "))
	}
	return filterIDMap, nil
}
// loadSpeciesCalltypeIDs loads species and calltype ID maps
func loadSpeciesCalltypeIDs(
	dbConn *sql.DB,
	mapping utils.MappingFile,
	uniqueSpecies map[string]bool,
	uniqueCalltypes map[string]map[string]bool,
) (map[string]string, map[string]map[string]string, error) {
	speciesIDMap := make(map[string]string)
	calltypeIDMap := make(map[string]map[string]string) // (dbSpecies, dbCalltype) -> calltype_id
	// Collect all DB species labels from mapping
	dbSpeciesSet := make(map[string]bool)
	for dataSpecies := range uniqueSpecies {
		if dbSpecies, ok := mapping.GetDBSpecies(dataSpecies); ok {
			dbSpeciesSet[dbSpecies] = true
		}
	}
	// Load species IDs
	if len(dbSpeciesSet) > 0 {
		dbSpeciesList := make([]string, 0, len(dbSpeciesSet))
		for s := range dbSpeciesSet {
			dbSpeciesList = append(dbSpeciesList, s)
		}
		query := `SELECT id, label FROM species WHERE label IN (` + db.Placeholders(len(dbSpeciesList)) + `) AND active = true`
		args := make([]any, len(dbSpeciesList))
		for i, s := range dbSpeciesList {
			args[i] = s
		}
		rows, err := dbConn.Query(query, args...)
		if err != nil {
			return nil, nil, fmt.Errorf("failed to query species: %w", err)
		}
		defer rows.Close()
		for rows.Next() {
			var id, label string
			if err := rows.Scan(&id, &label); err == nil {
				speciesIDMap[label] = id
			}
		}
	}
	// Load calltype IDs
	for dataSpecies, ctSet := range uniqueCalltypes {
		dbSpecies, ok := mapping.GetDBSpecies(dataSpecies)
		if !ok {
			continue
		}
		if calltypeIDMap[dbSpecies] == nil {
			calltypeIDMap[dbSpecies] = make(map[string]string)
		}
		for dataCalltype := range ctSet {
			dbCalltype := mapping.GetDBCalltype(dataSpecies, dataCalltype)
			// Query calltype ID
			var calltypeID string
			err := dbConn.QueryRow(`
				SELECT ct.id
				FROM call_type ct
				JOIN species s ON ct.species_id = s.id
				WHERE s.label = ? AND ct.label = ? AND ct.active = true
			`, dbSpecies, dbCalltype).Scan(&calltypeID)
			if err == nil {
				calltypeIDMap[dbSpecies][dbCalltype] = calltypeID
			}
		}
	}
	return speciesIDMap, calltypeIDMap, nil
}
// validateAndMapFiles validates files exist by hash, are linked to dataset, and have no existing labels
func validateAndMapFiles(
	dbConn *sql.DB,
	scannedFiles []scannedDataFile,
	clusterID string,
	datasetID string,
) (map[string]scannedDataFile, []ImportSegmentError) {
	fileIDMap := make(map[string]scannedDataFile)
	var errors []ImportSegmentError
	for _, sf := range scannedFiles {
		// Compute hash
		hash, err := utils.ComputeXXH64(sf.WavPath)
		if err != nil {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(sf.WavPath),
				Stage:   utils.StageHash,
				Message: fmt.Sprintf("failed to compute hash: %v", err),
			})
			continue
		}
		sf.WavHash = hash
		// Find file by hash in cluster
		var fileID string
		var duration float64
		err = dbConn.QueryRow(`
			SELECT id, duration FROM file WHERE xxh64_hash = ? AND cluster_id = ? AND active = true
		`, hash, clusterID).Scan(&fileID, &duration)
		if err == sql.ErrNoRows {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(sf.WavPath),
				Stage:   utils.StageValidation,
				Message: fmt.Sprintf("file hash not found in database for cluster (hash: %s)", hash),
			})
			continue
		}
		if err != nil {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(sf.WavPath),
				Stage:   utils.StageValidation,
				Message: fmt.Sprintf("failed to query file: %v", err),
			})
			continue
		}
		sf.FileID = fileID
		sf.Duration = duration
		// Verify file is linked to dataset via file_dataset junction table (composite FK)
		var fileLinkedToDataset bool
		err = dbConn.QueryRow(`
			SELECT EXISTS(SELECT 1 FROM file_dataset WHERE file_id = ? AND dataset_id = ?)
		`, fileID, datasetID).Scan(&fileLinkedToDataset)
		if err != nil {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(sf.WavPath),
				Stage:   utils.StageValidation,
				Message: fmt.Sprintf("failed to verify file-dataset link: %v", err),
			})
			continue
		}
		if !fileLinkedToDataset {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(sf.WavPath),
				Stage:   utils.StageValidation,
				Message: fmt.Sprintf("file exists in cluster but is not linked to dataset %s", datasetID),
			})
			continue
		}
		// Check no existing labels for this file
		var labelCount int
		err = dbConn.QueryRow(`
			SELECT COUNT(*) FROM label l
			JOIN segment s ON l.segment_id = s.id
			WHERE s.file_id = ? AND l.active = true
		`, fileID).Scan(&labelCount)
		if err != nil {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(sf.WavPath),
				Stage:   utils.StageValidation,
				Message: fmt.Sprintf("failed to check existing labels: %v", err),
			})
			continue
		}
		if labelCount > 0 {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(sf.WavPath),
				Stage:   utils.StageValidation,
				Message: fmt.Sprintf("file already has %d label(s) - fresh imports only", labelCount),
			})
			continue
		}
		fileIDMap[fileID] = sf
	}
	return fileIDMap, errors
}
// dataFileUpdate holds data to write back to .data file after import
type dataFileUpdate struct {
	DataPath string
	WavHash  string
	LabelIDs map[int]map[int]string // segmentIndex -> labelIndex -> labelID
}
// importSegmentsIntoDB performs the transactional import
func importSegmentsIntoDB(
	ctx context.Context,
	database *sql.DB,
	fileIDMap map[string]scannedDataFile,
	scannedFiles []scannedDataFile,
	mapping utils.MappingFile,
	filterIDMap map[string]string,
	speciesIDMap map[string]string,
	calltypeIDMap map[string]map[string]string,
	datasetID string,
	progressHandler func(processed, total int, message string),
) ([]SegmentImport, int, int, []dataFileUpdate, []ImportSegmentError) {
	var importedSegments []SegmentImport
	var errors []ImportSegmentError
	importedLabels := 0
	importedSubtypes := 0
	var fileUpdates []dataFileUpdate
	tx, err := db.BeginLoggedTx(ctx, database, "import_segments")
	if err != nil {
		errors = append(errors, ImportSegmentError{
			Stage:   utils.StageImport,
			Message: fmt.Sprintf("failed to begin transaction: %v", err),
		})
		return nil, 0, 0, nil, errors
	}
	defer tx.Rollback()
	totalFiles := len(fileIDMap)
	processedFiles := 0
	for _, sf := range fileIDMap {
		if sf.FileID == "" {
			continue
		}
		processedFiles++
		if progressHandler != nil {
			progressHandler(processedFiles, totalFiles, filepath.Base(sf.DataPath))
		}
		fileUpdate := dataFileUpdate{
			DataPath: sf.DataPath,
			WavHash:  sf.WavHash,
			LabelIDs: make(map[int]map[int]string),
		}
		for segIdx, seg := range sf.Segments {
			segImp, labelIDs, subtypes, segErrs := importSegment(ctx, tx, seg, segIdx, sf, datasetID, mapping, filterIDMap, speciesIDMap, calltypeIDMap)
			errors = append(errors, segErrs...)
			importedSubtypes += subtypes
			if len(segImp.Labels) == 0 {
				// Delete orphaned segment (no labels succeeded)
				if _, err := tx.ExecContext(ctx, `DELETE FROM segment WHERE id = ?`, segImp.SegmentID); err != nil {
					errors = append(errors, ImportSegmentError{
						File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
						Message: fmt.Sprintf("failed to delete orphaned segment: %v", err),
					})
				}
			} else {
				importedSegments = append(importedSegments, segImp)
				importedLabels += len(labelIDs)
				fileUpdate.LabelIDs[segIdx] = labelIDs
			}
		}
		fileUpdates = append(fileUpdates, fileUpdate)
	}
	if err := tx.Commit(); err != nil {
		errors = append(errors, ImportSegmentError{
			Stage:   utils.StageImport,
			Message: fmt.Sprintf("failed to commit transaction: %v", err),
		})
		return nil, 0, 0, nil, errors
	}
	return importedSegments, importedLabels, importedSubtypes, fileUpdates, errors
}
// countTotalSegments counts total segments from validated files
func countTotalSegments(fileIDMap map[string]scannedDataFile) int {
	count := 0
	for _, sf := range fileIDMap {
		count += len(sf.Segments)
	}
	return count
}
// writeIDsToDataFiles writes skraak_hash and skraak_label_ids back to .data files
func writeIDsToDataFiles(fileUpdates []dataFileUpdate) []ImportSegmentError {
	var errors []ImportSegmentError
	for _, fu := range fileUpdates {
		// Parse the .data file
		df, err := utils.ParseDataFile(fu.DataPath)
		if err != nil {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(fu.DataPath),
				Stage:   utils.StageImport,
				Message: fmt.Sprintf("failed to re-parse .data file for writing: %v", err),
			})
			continue
		}
		// Write skraak_hash to metadata
		if df.Meta.Extra == nil {
			df.Meta.Extra = make(map[string]any)
		}
		df.Meta.Extra["skraak_hash"] = fu.WavHash
		// Write skraak_label_id to each label
		for segIdx, labelIDs := range fu.LabelIDs {
			if segIdx >= len(df.Segments) {
				continue
			}
			seg := df.Segments[segIdx]
			for labelIdx, labelID := range labelIDs {
				if labelIdx >= len(seg.Labels) {
					continue
				}
				label := seg.Labels[labelIdx]
				if label.Extra == nil {
					label.Extra = make(map[string]any)
				}
				label.Extra["skraak_label_id"] = labelID
			}
		}
		// Write the updated .data file
		if err := df.Write(fu.DataPath); err != nil {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(fu.DataPath),
				Stage:   utils.StageImport,
				Message: fmt.Sprintf("failed to write updated .data file: %v", err),
			})
			continue
		}
	}
	return errors
}
	if seg.EndTime > sf.Duration {
		errors = append(errors, ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("segment end time (%.2f) exceeds file duration (%.2f)", seg.EndTime, sf.Duration),
		})
		return SegmentImport{}, nil, 0, errors
	}
	segmentID, err := utils.GenerateLongID()
	if err != nil {
		errors = append(errors, ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("failed to generate segment ID: %v", err),
		})
		return SegmentImport{}, nil, 0, errors
	}
	_, err = tx.ExecContext(ctx, `
		INSERT INTO segment (id, file_id, dataset_id, start_time, end_time, freq_low, freq_high, created_at, last_modified, active)
		VALUES (?, ?, ?, ?, ?, ?, ?, now(), now(), true)
	`, segmentID, sf.FileID, datasetID, seg.StartTime, seg.EndTime, seg.FreqLow, seg.FreqHigh)
	if err != nil {
		errors = append(errors, ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("failed to insert segment: %v", err),
		})
		return SegmentImport{}, nil, 0, errors
	}
	segImport := SegmentImport{
		SegmentID: segmentID,
		FileName:  filepath.Base(sf.WavPath),
		StartTime: seg.StartTime,
		EndTime:   seg.EndTime,
		FreqLow:   seg.FreqLow,
		FreqHigh:  seg.FreqHigh,
		Labels:    make([]LabelImport, 0),
	}
	labelIDs := make(map[int]string)
	var subtypesImported int
	for labelIdx, label := range seg.Labels {
		result := importSingleLabel(ctx, tx, label, segmentID, segIdx, labelIdx, sf, mapping, filterIDMap, speciesIDMap, calltypeIDMap)
		if result.hasError {
			errors = append(errors, result.err)
			continue
		}
		labelIDs[labelIdx] = result.labelID
		segImport.Labels = append(segImport.Labels, result.labelImport)
		subtypesImported += result.subtypesImported
	}
	return segImport, labelIDs, subtypesImported, errors
}
// importSegment inserts a single segment and its labels into the DB.
func importSegment(
	ctx context.Context,
	tx *db.LoggedTx,
	seg *utils.Segment,
	segIdx int,
	sf scannedDataFile,
	datasetID string,
	mapping utils.MappingFile,
	filterIDMap map[string]string,
	speciesIDMap map[string]string,
	calltypeIDMap map[string]map[string]string,
) (SegmentImport, map[int]string, int, []ImportSegmentError) {
	var errors []ImportSegmentError
	if seg.StartTime >= seg.EndTime {
		errors = append(errors, ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("invalid segment bounds: start=%.2f >= end=%.2f", seg.StartTime, seg.EndTime),
		})
		return SegmentImport{}, nil, 0, errors
	}
}
// importLabelResult holds the result of importing a single label.
type importLabelResult struct {
	labelImport      LabelImport
	labelID          string
	subtypesImported int
	err              ImportSegmentError
	hasError         bool
}
// importSingleLabel inserts a single label and its metadata/subtype into the DB.
func importSingleLabel(
	ctx context.Context,
	tx *db.LoggedTx,
	label *utils.Label,
	segmentID string,
	segIdx, labelIdx int,
	sf scannedDataFile,
	mapping utils.MappingFile,
	filterIDMap map[string]string,
	speciesIDMap map[string]string,
	calltypeIDMap map[string]map[string]string,
) importLabelResult {
	dbSpecies, ok := mapping.GetDBSpecies(label.Species)
	if !ok {
		return importLabelResult{err: ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("species not found in mapping: %s", label.Species),
		}, hasError: true}
	}
	speciesID, ok := speciesIDMap[dbSpecies]
	if !ok {
		return importLabelResult{err: ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("species ID not found: %s", dbSpecies),
		}, hasError: true}
	}
	filterID, ok := filterIDMap[label.Filter]
	if !ok {
		return importLabelResult{err: ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("filter ID not found: %s", label.Filter),
		}, hasError: true}
	}
	labelID, err := utils.GenerateLongID()
	if err != nil {
		return importLabelResult{err: ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("failed to generate label ID: %v", err),
		}, hasError: true}
	}
	_, err = tx.ExecContext(ctx, `
		INSERT INTO label (id, segment_id, species_id, filter_id, certainty, created_at, last_modified, active)
		VALUES (?, ?, ?, ?, ?, now(), now(), true)
	`, labelID, segmentID, speciesID, filterID, label.Certainty)
	if err != nil {
		return importLabelResult{err: ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("failed to insert label: %v", err),
		}, hasError: true}
	}
	// Insert label_metadata if comment exists
	if label.Comment != "" {
		escapedComment := strings.ReplaceAll(label.Comment, `"`, `\"`)
		metadataJSON := fmt.Sprintf(`{"comment": "%s"}`, escapedComment)
		if _, err := tx.ExecContext(ctx, `
			INSERT INTO label_metadata (label_id, json, created_at, last_modified, active)
			VALUES (?, ?, now(), now(), true)
		`, labelID, metadataJSON); err != nil {
			return importLabelResult{err: ImportSegmentError{
				File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
				Message: fmt.Sprintf("failed to insert label_metadata: %v", err),
			}, hasError: true}
		}
	}
	labelImport := LabelImport{
		LabelID:   labelID,
		Species:   dbSpecies,
		Filter:    label.Filter,
		Certainty: label.Certainty,
	}
	if label.Comment != "" {
		labelImport.Comment = label.Comment
	}
	// Insert label_subtype if calltype exists
	if label.CallType != "" {
		if err := importCalltype(ctx, tx, labelID, label, dbSpecies, filterID, mapping, calltypeIDMap, sf); err != nil {
			return importLabelResult{err: *err, hasError: true}
		}
		labelImport.CallType = mapping.GetDBCalltype(label.Species, label.CallType)
		return importLabelResult{labelImport: labelImport, labelID: labelID, subtypesImported: 1}
	}
	return importLabelResult{labelImport: labelImport, labelID: labelID}
}
// importCalltype inserts a label_subtype row for a calltype label.
func importCalltype(
	ctx context.Context,
	tx *db.LoggedTx,
	labelID string,
	label *utils.Label,
	dbSpecies string,
	filterID string,
	mapping utils.MappingFile,
	calltypeIDMap map[string]map[string]string,
	sf scannedDataFile,
) *ImportSegmentError {
	dbCalltype := mapping.GetDBCalltype(label.Species, label.CallType)
	calltypeID := ""
	if calltypeIDMap[dbSpecies] != nil {
		calltypeID = calltypeIDMap[dbSpecies][dbCalltype]
	}
	if calltypeID == "" {
		return &ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("calltype ID not found: %s/%s", dbSpecies, dbCalltype),
		}
	}
	subtypeID, err := utils.GenerateLongID()
	if err != nil {
		return &ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("failed to generate label_subtype ID: %v", err),
		}
	}
	_, err = tx.ExecContext(ctx, `
		INSERT INTO label_subtype (id, label_id, calltype_id, filter_id, certainty, created_at, last_modified, active)
		VALUES (?, ?, ?, ?, ?, now(), now(), true)
	`, subtypeID, labelID, calltypeID, filterID, label.Certainty)
	if err != nil {
		return &ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("failed to insert label_subtype: %v", err),
		}
	}
	return nil
	database, err := db.OpenWriteableDB(resolveDBPath(input.DBPath))
// segmentValidation holds the results of pre-import validation (phases B+C).
type segmentValidation struct {
	scannedFiles  []scannedDataFile
	filterIDMap   map[string]string
	speciesIDMap  map[string]string
	calltypeIDMap map[string]map[string]string
	fileIDMap     map[string]scannedDataFile
}
// validateAndPrepareSegments performs phases B+C: parse data files, validate DB state, and prepare ID maps.
func validateAndPrepareSegments(
	database *sql.DB,
	input ImportSegmentsInput,
	mapping utils.MappingFile,
	dataFiles []string,
) (*segmentValidation, []ImportSegmentError, error) {
	// Phase B: Parse all .data files and collect unique values
	scannedFiles, parseErrors, uniqueFilters, uniqueSpecies, uniqueCalltypes := scanAllDataFiles(dataFiles, input.Folder)
	if len(scannedFiles) == 0 {
		return nil, parseErrors, nil
	}
	// Validate dataset/location/cluster hierarchy
	if err := validateSegmentHierarchy(database, input.DatasetID, input.LocationID, input.ClusterID); err != nil {
		return nil, parseErrors, err
	}
	// Validate all filters exist
	filterIDMap, err := validateFiltersExist(database, uniqueFilters)
	if err != nil {
		return nil, parseErrors, fmt.Errorf("filter validation failed: %w", err)
	}
	// Validate mapping covers all species/calltypes and they exist in DB
	validationResult, err := utils.ValidateMappingAgainstDB(database, mapping, uniqueSpecies, uniqueCalltypes)
	if err != nil {
		return nil, parseErrors, fmt.Errorf("mapping validation failed: %w", err)
	}
	if validationResult.HasErrors() {
		return nil, parseErrors, fmt.Errorf("mapping validation failed: %s", validationResult.Error())
	}
	// Load species and calltype ID maps
	speciesIDMap, calltypeIDMap, err := loadSpeciesCalltypeIDs(database, mapping, uniqueSpecies, uniqueCalltypes)
	if err != nil {
		return nil, parseErrors, fmt.Errorf("failed to load species/calltype IDs: %w", err)
	}
	// Validate files: hash exists, linked to dataset, no existing labels
	fileIDMap, hashErrors := validateAndMapFiles(database, scannedFiles, input.ClusterID, input.DatasetID)
	allErrors := append(parseErrors, hashErrors...)
	return &segmentValidation{
		scannedFiles:  scannedFiles,
		filterIDMap:   filterIDMap,
		speciesIDMap:  speciesIDMap,
		calltypeIDMap: calltypeIDMap,
		fileIDMap:     fileIDMap,
	}, allErrors, nil
}
	DBPath          string `json:"db_path"`

File deletion: import_files.go

BF:BFD[6.248737] → [6.351133:351172]

BF:BFD[6.351172] → [6.345251:345251]

B:BD[6.345251] → [6.345252:345479]

∅:D[4.2005] → [6.345479:346978]

B:BD[6.345479] → [6.345479:346978]

∅:D[4.2087] → [6.347038:347118]

B:BD[6.347038] → [6.347038:347118]

∅:D[4.2154] → [6.347163:347513]

B:BD[6.347163] → [6.347163:347513]

∅:D[17.5545] → [6.347592:347764]

B:BD[6.347592] → [6.347592:347764]

∅:D[17.5562] → [6.347764:347826]

B:BD[6.347764] → [6.347764:347826]

∅:D[17.5670] → [6.347826:349360]

B:BD[6.347826] → [6.347826:349360]

B:BD[6.349360] → [11.17022:17244]

∅:D[11.17244] → [6.349545:349546]

B:BD[6.349545] → [6.349545:349546]

B:BD[6.349546] → [11.17245:17409]

∅:D[11.17409] → [6.349906:349907]

B:BD[6.349906] → [6.349906:349907]

B:BD[6.349907] → [11.17410:17566]

∅:D[11.17566] → [6.351117:351118]

B:BD[6.351117] → [6.351117:351118]

B:BD[6.351118] → [11.17567:17584]

∅:D[11.17584] → [6.351130:351132]

B:BD[6.351130] → [6.351130:351132]

B:BD[6.347826] → [17.5563:5670]

B:BD[6.347764] → [17.5546:5562]

B:BD[6.347513] → [17.5292:5545]

B:BD[6.347118] → [4.2088:2154]

B:BD[6.346978] → [4.2006:2087]

B:BD[6.345479] → [4.1969:2005]

package tools
import (
	"context"
	"database/sql"
	"fmt"
	"os"
	"time"
	"skraak/db"
	"skraak/utils"
)
// ImportAudioFilesInput defines the input parameters for the import_audio_files tool
type ImportAudioFilesInput struct {
	FolderPath string `json:"folder_path"`
	DatasetID  string `json:"dataset_id"`
	LocationID string `json:"location_id"`
	ClusterID  string `json:"cluster_id"`
	Recursive  *bool  `json:"recursive,omitempty"` // *bool because default is true; plain bool would make "not provided" indistinguishable from "false"
}
// ImportAudioFilesOutput defines the output structure for the import_audio_files tool
type ImportAudioFilesOutput struct {
	Summary ImportSummary           `json:"summary"`
	FileIDs []string                `json:"file_ids"`
	Errors  []utils.FileImportError `json:"errors,omitempty"`
}
// ImportSummary provides summary statistics for the import operation
type ImportSummary struct {
	TotalFiles     int     `json:"total_files"`
	ImportedFiles  int     `json:"imported_files"`
	SkippedFiles   int     `json:"skipped_files"` // Duplicates
	FailedFiles    int     `json:"failed_files"`
	AudioMothFiles int     `json:"audiomoth_files"`
	TotalDuration  float64 `json:"total_duration_seconds"`
	ProcessingTime string  `json:"processing_time"`
}
// ImportAudioFiles batch imports WAV files from a folder with hash-based duplicate detection
func ImportAudioFiles(
	ctx context.Context,
	input ImportAudioFilesInput,
) (ImportAudioFilesOutput, error) {
	startTime := time.Now()
	var output ImportAudioFilesOutput
	// Default recursive to true
	recursive := true
	if input.Recursive != nil {
		recursive = *input.Recursive
	}
	// Validate database hierarchy (dataset → location → cluster)
		return output, fmt.Errorf("validation failed: %w", err)
	}
	// Open database
	if err != nil {
		return output, fmt.Errorf("failed to open database: %w", err)
	}
	defer database.Close()
	// Set cluster path if empty
	err = utils.EnsureClusterPath(database, input.ClusterID, input.FolderPath)
	if err != nil {
		return output, fmt.Errorf("failed to set cluster path: %w", err)
	}
	// Import the cluster (ALL THE LOGIC IS HERE)
		FolderPath: input.FolderPath,
		DatasetID:  input.DatasetID,
		LocationID: input.LocationID,
		ClusterID:  input.ClusterID,
		Recursive:  recursive,
	})
	if err != nil {
		return output, fmt.Errorf("cluster import failed: %w", err)
	}
	// Map to output format
	output = ImportAudioFilesOutput{
		Summary: ImportSummary{
			TotalFiles:     clusterOutput.TotalFiles,
			ImportedFiles:  clusterOutput.ImportedFiles,
			SkippedFiles:   clusterOutput.SkippedFiles,
			FailedFiles:    clusterOutput.FailedFiles,
			AudioMothFiles: clusterOutput.AudioMothFiles,
			TotalDuration:  clusterOutput.TotalDuration,
			ProcessingTime: time.Since(startTime).String(),
		},
		FileIDs: []string{}, // File IDs not tracked currently
		Errors:  clusterOutput.Errors,
	}
	return output, nil
}
// validateImportInput validates all input parameters and database relationships
func validateImportInput(input ImportAudioFilesInput, dbPath string) error {
	// Verify folder exists
	info, err := os.Stat(input.FolderPath)
	if err != nil {
		return fmt.Errorf("folder not accessible: %w", err)
	}
	if !info.IsDir() {
		return fmt.Errorf("path is not a directory: %s", input.FolderPath)
	}
	return validateHierarchyIDs(input.DatasetID, input.LocationID, input.ClusterID, dbPath)
}
// validateHierarchyIDs validates dataset/location/cluster ID formats and database relationships
func validateHierarchyIDs(datasetID, locationID, clusterID, dbPath string) error {
	// Validate ID formats first (fast fail before DB queries)
	if err := utils.ValidateShortID(datasetID, "dataset_id"); err != nil {
		return err
	}
	if err := utils.ValidateShortID(locationID, "location_id"); err != nil {
		return err
	}
	if err := utils.ValidateShortID(clusterID, "cluster_id"); err != nil {
		return err
	}
	return db.WithReadDB(dbPath, func(database *sql.DB) error {
		// Verify dataset exists, is active, and is 'structured' type
		if err := db.ValidateDatasetTypeForImport(database, datasetID); err != nil {
			return err
		}
		// Verify location exists and belongs to dataset
		if err := db.ValidateLocationBelongsToDataset(database, locationID, datasetID); err != nil {
			return err
		}
		// Verify cluster exists and belongs to location
		if err := db.ClusterBelongsToLocation(database, clusterID, locationID); err != nil {
			return err
		}
		return nil
	})
}
	}
	if err := tx.Commit(); err != nil {
		return output, fmt.Errorf("transaction commit failed: %w", err)
		tx.Rollback()
	tx, err := db.BeginLoggedTx(ctx, database, "import_audio_files")
	if err != nil {
		return output, fmt.Errorf("failed to begin transaction: %w", err)
	}
	clusterOutput, err := utils.ImportCluster(database, tx.UnderlyingTx(), utils.ClusterImportInput{
	database, err := db.OpenWriteableDB(resolveDBPath(input.DBPath))
	if err := validateImportInput(input, resolveDBPath(input.DBPath)); err != nil {
	DBPath     string `json:"db_path"`

File deletion: import_file.go

BF:BFD[6.248737] → [6.357911:357949]

BF:BFD[6.357949] → [6.351174:351174]

B:BD[6.351174] → [6.351175:351411]

∅:D[4.2192] → [6.351411:352635]

B:BD[6.351411] → [6.351411:352635]

∅:D[4.2320] → [6.352741:352893]

B:BD[6.352741] → [6.352741:352893]

∅:D[4.2387] → [6.352938:357910]

B:BD[6.352938] → [6.352938:357910]

B:BD[6.352893] → [4.2321:2387]

B:BD[6.352635] → [4.2193:2320]

B:BD[6.351411] → [4.2156:2192]

package tools
import (
	"context"
	"database/sql"
	"fmt"
	"os"
	"path/filepath"
	"strings"
	"time"
	"skraak/db"
	"skraak/utils"
)
// ImportFileInput defines the input parameters for the import_file tool
type ImportFileInput struct {
	FilePath   string `json:"file_path"`
	DatasetID  string `json:"dataset_id"`
	LocationID string `json:"location_id"`
	ClusterID  string `json:"cluster_id"`
}
// ImportFileOutput defines the output structure for the import_file tool
type ImportFileOutput struct {
	FileID         string    `json:"file_id"`
	FileName       string    `json:"file_name"`
	Hash           string    `json:"hash"`
	Duration       float64   `json:"duration_seconds"`
	SampleRate     int       `json:"sample_rate"`
	TimestampLocal time.Time `json:"timestamp_local"`
	IsAudioMoth    bool      `json:"is_audiomoth"`
	IsDuplicate    bool      `json:"is_duplicate"`
	ProcessingTime string    `json:"processing_time"`
	Error          *string   `json:"error,omitempty"`
}
// ImportFile imports a single WAV file into the database with duplicate detection
func ImportFile(
	ctx context.Context,
	input ImportFileInput,
) (ImportFileOutput, error) {
	startTime := time.Now()
	var output ImportFileOutput
	// Phase 1: Validate file path
	_, err := validateFilePath(input.FilePath)
	if err != nil {
		return output, fmt.Errorf("file validation failed: %w", err)
	}
	output.FileName = filepath.Base(input.FilePath)
	// Phase 2: Validate database hierarchy
		return output, fmt.Errorf("hierarchy validation failed: %w", err)
	}
	// Phase 3: Open database connection (single connection for all DB operations)
	if err != nil {
		return output, fmt.Errorf("database connection failed: %w", err)
	}
	defer database.Close()
	// Phase 4: Get location data for astronomical calculations
	locData, err := utils.GetLocationData(database, input.LocationID)
	if err != nil {
		return output, fmt.Errorf("failed to get location data: %w", err)
	}
	// Phase 5: Process file metadata
	result, err := utils.ProcessSingleFile(input.FilePath, locData.Latitude, locData.Longitude, locData.TimezoneID, true)
	if err != nil {
		errMsg := err.Error()
		output.Error = &errMsg
		output.ProcessingTime = time.Since(startTime).String()
		return output, fmt.Errorf("file processing failed: %w", err)
	}
	// Populate output with extracted metadata
	output.FileName = result.FileName
	output.Hash = result.Hash
	output.Duration = result.Duration
	output.SampleRate = result.SampleRate
	output.TimestampLocal = result.TimestampLocal
	output.IsAudioMoth = result.IsAudioMoth
	// Phase 6: Ensure cluster path is set
	if err := utils.EnsureClusterPath(database, input.ClusterID, filepath.Dir(input.FilePath)); err != nil {
		return output, fmt.Errorf("failed to set cluster path: %w", err)
	}
	// Phase 7: Insert into database
	fileID, isDuplicate, err := insertFileIntoDB(ctx, database, result, input.DatasetID, input.ClusterID, input.LocationID)
	if err != nil {
		errMsg := err.Error()
		output.Error = &errMsg
		output.ProcessingTime = time.Since(startTime).String()
		return output, fmt.Errorf("database insertion failed: %w", err)
	}
	output.FileID = fileID
	output.IsDuplicate = isDuplicate
	output.ProcessingTime = time.Since(startTime).String()
	return output, nil
}
// validateFilePath validates the file exists, is a regular file, is a WAV file, and is not empty
func validateFilePath(filePath string) (os.FileInfo, error) {
	// Check file exists
	info, err := os.Stat(filePath)
	if err != nil {
		if os.IsNotExist(err) {
			return nil, fmt.Errorf("file does not exist: %s", filePath)
		}
		return nil, fmt.Errorf("cannot access file: %w", err)
	}
	// Check it's a regular file
	if !info.Mode().IsRegular() {
		return nil, fmt.Errorf("path is not a regular file: %s", filePath)
	}
	// Check extension is .wav (case-insensitive)
	ext := strings.ToLower(filepath.Ext(filePath))
	if ext != ".wav" {
		return nil, fmt.Errorf("file must be a WAV file (got extension: %s)", ext)
	}
	// Check file is not empty
	if info.Size() == 0 {
		return nil, fmt.Errorf("file is empty: %s", filePath)
	}
	return info, nil
}
// insertFileIntoDB inserts a single file into the database
// Returns (fileID, isDuplicate, error)
func insertFileIntoDB(
	ctx context.Context,
	database *sql.DB,
	result *utils.FileProcessingResult,
	datasetID, clusterID, locationID string,
) (string, bool, error) {
	// Begin logged transaction
	tx, err := db.BeginLoggedTx(ctx, database, "import_audio_file")
	if err != nil {
		return "", false, fmt.Errorf("failed to begin transaction: %w", err)
	}
	defer tx.Rollback() // Rollback if not committed
	// Check for duplicate hash
	existingID, isDup, err := utils.CheckDuplicateHash(tx, result.Hash)
	if err != nil {
		return "", false, err
	}
	if isDup {
		return existingID, true, nil
	}
	// Generate file ID
	fileID, err := utils.GenerateLongID()
	if err != nil {
		return "", false, fmt.Errorf("ID generation failed: %w", err)
	}
	// Insert file record
	_, err = tx.ExecContext(ctx, `
		INSERT INTO file (
			id, file_name, xxh64_hash, location_id, timestamp_local,
			cluster_id, duration, sample_rate, maybe_solar_night, maybe_civil_night,
			moon_phase, created_at, last_modified, active
		) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, now(), now(), true)
	`,
		fileID, result.FileName, result.Hash, locationID,
		result.TimestampLocal, clusterID, result.Duration, result.SampleRate,
		result.AstroData.SolarNight, result.AstroData.CivilNight, result.AstroData.MoonPhase,
	)
	if err != nil {
		return "", false, fmt.Errorf("file insert failed: %w", err)
	}
	// Insert file_dataset junction
	_, err = tx.ExecContext(ctx, `
		INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified)
		VALUES (?, ?, now(), now())
	`, fileID, datasetID)
	if err != nil {
		return "", false, fmt.Errorf("file_dataset insert failed: %w", err)
	}
	// If AudioMoth, insert moth_metadata
	if result.IsAudioMoth && result.MothData != nil {
		_, err = tx.ExecContext(ctx, `
			INSERT INTO moth_metadata (
				file_id, timestamp, recorder_id, gain, battery_v, temp_c,
				created_at, last_modified, active
			) VALUES (?, ?, ?, ?, ?, ?, now(), now(), true)
		`,
			fileID,
			result.MothData.Timestamp,
			&result.MothData.RecorderID,
			&result.MothData.Gain,
			&result.MothData.BatteryV,
			&result.MothData.TempC,
		)
		if err != nil {
			return "", false, fmt.Errorf("moth_metadata insert failed: %w", err)
		}
	}
	// Commit transaction
	if err = tx.Commit(); err != nil {
		return "", false, fmt.Errorf("transaction commit failed: %w", err)
	}
	return fileID, false, nil
}
	database, err := db.OpenWriteableDB(resolveDBPath(input.DBPath))
	if err := validateHierarchyIDs(input.DatasetID, input.LocationID, input.ClusterID, resolveDBPath(input.DBPath)); err != nil {
	DBPath     string `json:"db_path"`

File deletion: calls_summarise.go

BF:BFD[6.248737] → [6.400201:400243]

BF:BFD[6.400243] → [6.392542:392542]

B:BD[6.392542] → [6.392543:395632]

∅:D[18.7572] → [6.395632:395633]

B:BD[6.395632] → [6.395632:395633]

B:BD[6.395633] → [18.7573:8059]

∅:D[18.8059] → [6.395655:395746]

B:BD[6.395655] → [6.395655:395746]

B:BD[6.395795] → [6.395795:395904]

B:BD[6.395904] → [18.8060:8107]

∅:D[18.8107] → [6.396118:396119]

B:BD[6.396118] → [6.396118:396119]

B:BD[6.396190] → [6.396190:396289]

B:BD[6.396311] → [6.396311:396347]

B:BD[6.396347] → [18.8108:8168]

∅:D[18.8168] → [6.396645:396719]

B:BD[6.396645] → [6.396645:396719]

B:BD[6.396719] → [18.8169:8219]

∅:D[18.8219] → [6.396796:396817]

B:BD[6.396796] → [6.396796:396817]

B:BD[6.396817] → [18.8220:8427]

∅:D[18.8427] → [6.397242:397247]

B:BD[6.397242] → [6.397242:397247]

∅:D[18.8437] → [6.397247:397248]

B:BD[6.397247] → [6.397247:397248]

B:BD[6.397248] → [18.8438:8743]

∅:D[18.8743] → [6.397632:397633]

B:BD[6.397632] → [6.397632:397633]

B:BD[6.397633] → [18.8744:9083]

∅:D[18.9083] → [6.397877:397878]

B:BD[6.397877] → [6.397877:397878]

B:BD[6.397878] → [18.9084:9579]

∅:D[18.9579] → [6.398087:398088]

B:BD[6.398087] → [6.398087:398088]

B:BD[6.398088] → [18.9580:9843]

∅:D[18.9843] → [6.398300:398301]

B:BD[6.398300] → [6.398300:398301]

B:BD[6.398301] → [18.9844:10164]

∅:D[18.10164] → [6.398581:398588]

B:BD[6.398581] → [6.398581:398588]

∅:D[18.10205] → [6.398588:398589]

B:BD[6.398588] → [6.398588:398589]

B:BD[6.398589] → [18.10206:10317]

∅:D[18.10317] → [6.398775:398779]

B:BD[6.398775] → [6.398775:398779]

B:BD[6.398779] → [18.10318:10876]

∅:D[18.10876] → [6.398835:398838]

B:BD[6.398835] → [6.398835:398838]

∅:D[18.10879] → [6.398838:398839]

B:BD[6.398838] → [6.398838:398839]

B:BD[6.398839] → [18.10880:11108]

∅:D[18.11108] → [6.398920:399318]

B:BD[6.398920] → [6.398920:399318]

B:BD[6.399318] → [18.11109:11165]

∅:D[18.11165] → [6.399400:399655]

B:BD[6.399400] → [6.399400:399655]

B:BD[6.399676] → [6.399676:400200]

B:BD[6.398838] → [18.10877:10879]

B:BD[6.398588] → [18.10165:10205]

B:BD[6.397247] → [18.8428:8437]

B:BD[6.395632] → [18.7502:7572]

package tools
import (
	"sort"
	"strings"
	"skraak/utils"
)
// CallsSummariseInput defines the input for the calls-summarise tool
type CallsSummariseInput struct {
	Folder string `json:"folder"`
	Brief  bool   `json:"brief"`
	Filter string `json:"filter,omitempty"`
}
// CallsSummariseOutput defines the output for the calls-summarise tool
type CallsSummariseOutput struct {
	Segments         []SegmentSummary       `json:"segments"`
	Folder           string                 `json:"folder"`
	DataFilesRead    int                    `json:"data_files_read"`
	DataFilesSkipped []string               `json:"data_files_skipped"`
	TotalSegments    int                    `json:"total_segments"`
	Filters          map[string]FilterStats `json:"filters"`
	ReviewStatus     ReviewStatus           `json:"review_status"`
	Operators        []string               `json:"operators"`
	Reviewers        []string               `json:"reviewers"`
	Error            *string                `json:"error,omitempty"`
}
// SegmentSummary represents a single segment in the output
type SegmentSummary struct {
	File      string         `json:"file"`
	StartTime float64        `json:"start_time"`
	EndTime   float64        `json:"end_time"`
	Labels    []LabelSummary `json:"labels"`
}
// LabelSummary represents a label in the output (omits empty fields)
type LabelSummary struct {
	Filter    string `json:"filter"`
	Certainty int    `json:"certainty"`
	Species   string `json:"species"`
	CallType  string `json:"calltype,omitempty"`
	Comment   string `json:"comment,omitempty"`
	Bookmark  bool   `json:"bookmark,omitempty"`
}
// FilterStats contains per-filter statistics
type FilterStats struct {
	Segments  int                       `json:"segments"`
	Species   map[string]int            `json:"species"`
	Calltypes map[string]map[string]int `json:"calltypes,omitempty"` // species -> calltype -> count
}
// ReviewStatus contains review progress statistics
type ReviewStatus struct {
	Unreviewed   int `json:"unreviewed"` // certainty < 100
	Confirmed    int `json:"confirmed"`  // certainty = 100
	DontKnow     int `json:"dont_know"`  // certainty = 0
	WithCallType int `json:"with_calltype"`
	WithComments int `json:"with_comments"`
	Bookmarked   int `json:"bookmarked"`
}
// CallsSummarise reads all .data files in a folder and produces a summary
func CallsSummarise(input CallsSummariseInput) (CallsSummariseOutput, error) {
	var output CallsSummariseOutput
	// Find all .data files
	filePaths, err := utils.FindDataFiles(input.Folder)
	if err != nil {
		errMsg := err.Error()
		output.Error = &errMsg
		return output, err
	}
	// Initialize empty slices/maps (avoid null in JSON)
	output.Segments = make([]SegmentSummary, 0)
	output.Folder = input.Folder
	output.Filters = make(map[string]FilterStats)
	output.Operators = make([]string, 0)
	output.Reviewers = make([]string, 0)
	output.DataFilesSkipped = make([]string, 0)
	if len(filePaths) == 0 {
		return output, nil
	}
	// Track unique operators and reviewers
	operatorSet := make(map[string]bool)
	reviewerSet := make(map[string]bool)
	// Count segments for total
	if input.Brief {
		for _, fs := range output.Filters {
			output.TotalSegments += fs.Segments
		}
	} else {
		output.TotalSegments = len(output.Segments)
	}
	finaliseSummary(&output, operatorSet, reviewerSet, input.Brief)
	return output, nil
}
// summariseFiles processes all data files, populating output stats
func summariseFiles(filePaths []string, input CallsSummariseInput, output *CallsSummariseOutput, operatorSet, reviewerSet map[string]bool) {
	for _, path := range filePaths {
		df, err := utils.ParseDataFile(path)
		if err != nil {
			output.DataFilesSkipped = append(output.DataFilesSkipped, path)
			continue
		}
		output.DataFilesRead++
		trackMeta(df.Meta, operatorSet, reviewerSet)
		var relPath string
		if !input.Brief {
			relPath = extractRelativePath(input.Folder, path)
		}
		for _, seg := range df.Segments {
			filteredLabels := filterLabels(seg.Labels, input.Filter)
			if input.Filter != "" && len(filteredLabels) == 0 {
				continue
			}
			updateStatsFromLabels(filteredLabels, output)
			if !input.Brief {
				output.Segments = append(output.Segments, SegmentSummary{
					File:      relPath,
					StartTime: seg.StartTime,
					EndTime:   seg.EndTime,
					Labels:    buildLabelSummaries(filteredLabels),
				})
			}
// trackMeta records operator and reviewer from file metadata
func trackMeta(meta *utils.DataMeta, operatorSet, reviewerSet map[string]bool) {
	if meta == nil {
		return
	}
	if meta.Operator != "" {
		operatorSet[meta.Operator] = true
	}
	if meta.Reviewer != "" {
		reviewerSet[meta.Reviewer] = true
	}
}
// filterLabels returns labels matching the filter, or all labels if filter is empty
func filterLabels(labels []*utils.Label, filter string) []*utils.Label {
	if filter == "" {
		return labels
	}
	var filtered []*utils.Label
	for _, l := range labels {
		if l.Filter == filter {
			filtered = append(filtered, l)
		}
	}
	return filtered
}
// buildLabelSummaries converts labels to label summaries
func buildLabelSummaries(labels []*utils.Label) []LabelSummary {
	var summaries []LabelSummary
	for _, l := range labels {
		ls := LabelSummary{
			Filter:    l.Filter,
			Certainty: l.Certainty,
			Species:   l.Species,
		}
		if l.CallType != "" {
			ls.CallType = l.CallType
		}
		if l.Comment != "" {
			ls.Comment = l.Comment
		}
		if l.Bookmark {
			ls.Bookmark = true
		}
		summaries = append(summaries, ls)
	}
	return summaries
}
// updateStatsFromLabels updates filter stats and review status from a set of labels
func updateStatsFromLabels(labels []*utils.Label, output *CallsSummariseOutput) {
	for _, l := range labels {
		updateFilterStats(l, output)
		updateReviewStatus(l, output)
	}
}
// updateFilterStats increments filter-level statistics for a single label
func updateFilterStats(l *utils.Label, output *CallsSummariseOutput) {
	fs, exists := output.Filters[l.Filter]
	if !exists {
		fs = FilterStats{
			Segments:  0,
			Species:   make(map[string]int),
			Calltypes: make(map[string]map[string]int),
		}
	}
	if l.CallType != "" {
		if fs.Calltypes[l.Species] == nil {
			fs.Calltypes[l.Species] = make(map[string]int)
		}
		fs.Calltypes[l.Species][l.CallType]++
	}
	output.Filters[l.Filter] = fs
}
// updateReviewStatus increments review status counters for a single label
func updateReviewStatus(l *utils.Label, output *CallsSummariseOutput) {
	switch l.Certainty {
	case 100:
		output.ReviewStatus.Confirmed++
	case 0:
		output.ReviewStatus.DontKnow++
	default:
		output.ReviewStatus.Unreviewed++
	}
	if l.CallType != "" {
		output.ReviewStatus.WithCallType++
	}
	if l.Comment != "" {
		output.ReviewStatus.WithComments++
	}
	if l.Bookmark {
		output.ReviewStatus.Bookmarked++
	}
// finaliseSummary sorts output, cleans empty maps, and converts sets to sorted slices
func finaliseSummary(output *CallsSummariseOutput, operatorSet, reviewerSet map[string]bool, brief bool) {
	// Clean up empty calltypes maps
	for filter, fs := range output.Filters {
		if len(fs.Calltypes) == 0 {
			fs.Calltypes = nil
			output.Filters[filter] = fs
		}
	}
	// Convert sets to sorted slices
	for op := range operatorSet {
		output.Operators = append(output.Operators, op)
	}
	for r := range reviewerSet {
		output.Reviewers = append(output.Reviewers, r)
	}
	sort.Strings(output.Operators)
	sort.Strings(output.Reviewers)
	// Sort segments by file, then start time
	if !brief {
		sort.Slice(output.Segments, func(i, j int) bool {
			if output.Segments[i].File != output.Segments[j].File {
				return output.Segments[i].File < output.Segments[j].File
			}
			return output.Segments[i].StartTime < output.Segments[j].StartTime
		})
	}
}
// extractRelativePath extracts the audio filename from a .data file path
// e.g., "/folder/tx51_LISTENING_20260221_203004.WAV.data" -> "tx51_LISTENING_20260221_203004.WAV"
// Preserves the original case of the extension as-is.
func extractRelativePath(folder, dataPath string) string {
	// Get the filename
	filename := dataPath
	if idx := strings.LastIndex(dataPath, "/"); idx >= 0 {
		filename = dataPath[idx+1:]
	}
	// Remove .data extension, preserve everything else
	return strings.TrimSuffix(filename, ".data")
}
}
	fs.Segments++
	fs.Species[l.Species]++
		}
	}
}
	summariseFiles(filePaths, input, &output, operatorSet, reviewerSet)

File deletion: calls_show_images.go

BF:BFD[6.248737] → [6.403517:403561]

BF:BFD[6.403561] → [6.400245:400245]

B:BD[6.400245] → [6.400246:403516]

package tools
import (
	"fmt"
	"os"
	"strings"
	"skraak/utils"
)
// CallsShowImagesInput defines the input for the show-images tool
type CallsShowImagesInput struct {
	DataFilePath string `json:"data_file_path"`
	Color        bool   `json:"color"`
	ImageSize    int    `json:"image_size"`
	Sixel        bool   `json:"sixel"`
	ITerm        bool   `json:"iterm"`
}
// CallsShowImagesOutput defines the output for the show-images tool
type CallsShowImagesOutput struct {
	SegmentsShown int    `json:"segments_shown"`
	WavFile       string `json:"wav_file"`
	Error         string `json:"error,omitempty"`
}
// CallsShowImages reads a .data file and displays spectrogram images for each segment
func CallsShowImages(input CallsShowImagesInput) (CallsShowImagesOutput, error) {
	var output CallsShowImagesOutput
	// Validate file exists
	if _, err := os.Stat(input.DataFilePath); os.IsNotExist(err) {
		output.Error = fmt.Sprintf("File not found: %s", input.DataFilePath)
		return output, fmt.Errorf("%s", output.Error)
	}
	// Derive WAV file path (strip .data suffix)
	wavPath := strings.TrimSuffix(input.DataFilePath, ".data")
	output.WavFile = wavPath
	// Check WAV file exists
	if _, err := os.Stat(wavPath); os.IsNotExist(err) {
		output.Error = fmt.Sprintf("WAV file not found: %s", wavPath)
		return output, fmt.Errorf("%s", output.Error)
	}
	// Parse .data file (includes labels for future filtering)
	dataFile, err := utils.ParseDataFile(input.DataFilePath)
	if err != nil {
		output.Error = err.Error()
		return output, fmt.Errorf("%s", output.Error)
	}
	if len(dataFile.Segments) == 0 {
		output.Error = "No segments found in .data file"
		return output, fmt.Errorf("%s", output.Error)
	}
	// Resolve image size
	imgSize := input.ImageSize
	if imgSize == 0 {
		imgSize = utils.SpectrogramDisplaySize
	}
	// Select graphics protocol
	protocol := utils.ProtocolKitty
	if input.ITerm {
		protocol = utils.ProtocolITerm
	} else if input.Sixel {
		protocol = utils.ProtocolSixel
	}
	// Generate spectrogram for each segment and output
	for i, seg := range dataFile.Segments {
		// Generate spectrogram image
		img, err := utils.GenerateSegmentSpectrogram(input.DataFilePath, seg.StartTime, seg.EndTime, input.Color, imgSize)
		if err != nil || img == nil {
			continue
		}
		// Print segment info
		labelInfo := formatSegmentLabels(seg.Labels)
		fmt.Fprintf(os.Stderr, "Segment %d: %.1fs - %.1fs (%.1fs)%s\n",
			i+1, seg.StartTime, seg.EndTime, seg.EndTime-seg.StartTime, labelInfo)
		// Write to stdout via terminal graphics protocol
		if err := utils.WriteImage(img, os.Stdout, protocol); err != nil {
			output.Error = fmt.Sprintf("Failed to write image: %v", err)
			return output, fmt.Errorf("%s", output.Error)
		}
		fmt.Println() // Newline after image
	}
	output.SegmentsShown = len(dataFile.Segments)
	return output, nil
}
// formatSegmentLabels formats labels for display in segment info
func formatSegmentLabels(labels []*utils.Label) string {
	if len(labels) == 0 {
		return ""
	}
	var parts []string
	for _, l := range labels {
		part := l.Species
		if l.CallType != "" {
			part += "/" + l.CallType
		}
		if l.Filter != "" {
			part += " [" + l.Filter + "]"
		}
		parts = append(parts, part)
	}
	return "  " + strings.Join(parts, ", ")
}

File deletion: calls_push_certainty_test.go

BF:BFD[6.248737] → [6.406958:407010]

BF:BFD[6.407010] → [6.403563:403563]

B:BD[6.403563] → [6.403564:406957]

package tools
import (
	"encoding/json"
	"os"
	"path/filepath"
	"testing"
	"skraak/utils"
)
func TestPushCertaintyPromotesMatchingLabels(t *testing.T) {
	tempDir := t.TempDir()
	// File with two Kiwi segments: certainty=90 and certainty=70
	file1 := `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]], [10, 20, 100, 1000, [{"species": "Kiwi", "certainty": 70}]]]`
	file1Path := filepath.Join(tempDir, "file1.data")
	if err := os.WriteFile(file1Path, []byte(file1), 0644); err != nil {
		t.Fatal(err)
	}
	// File with one Tomtit at certainty=90 (must not be promoted when species=Kiwi)
	file2 := `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Tomtit", "certainty": 90}]]]`
	file2Path := filepath.Join(tempDir, "file2.data")
	if err := os.WriteFile(file2Path, []byte(file2), 0644); err != nil {
		t.Fatal(err)
	}
	result, err := PushCertainty(PushCertaintyConfig{
		Folder:   tempDir,
		Species:  "Kiwi",
		Reviewer: "TestReviewer",
	})
	if err != nil {
		t.Fatal(err)
	}
	if result.SegmentsUpdated != 1 {
		t.Errorf("expected 1 segment updated, got %d", result.SegmentsUpdated)
	}
	if result.FilesUpdated != 1 {
		t.Errorf("expected 1 file updated, got %d", result.FilesUpdated)
	}
	// Verify file1: certainty=90 Kiwi → 100, certainty=70 Kiwi → unchanged
	df, err := utils.ParseDataFile(file1Path)
	if err != nil {
		t.Fatal(err)
	}
	if df.Segments[0].Labels[0].Certainty != 100 {
		t.Errorf("expected certainty=100, got %d", df.Segments[0].Labels[0].Certainty)
	}
	if df.Segments[1].Labels[0].Certainty != 70 {
		t.Errorf("expected certainty=70 unchanged, got %d", df.Segments[1].Labels[0].Certainty)
	}
	if df.Meta.Reviewer != "TestReviewer" {
		t.Errorf("expected reviewer=TestReviewer, got %q", df.Meta.Reviewer)
	}
	// Verify Tomtit file was not modified
	df2, err := utils.ParseDataFile(file2Path)
	if err != nil {
		t.Fatal(err)
	}
	if df2.Segments[0].Labels[0].Certainty != 90 {
		t.Errorf("Tomtit certainty should be unchanged at 90, got %d", df2.Segments[0].Labels[0].Certainty)
	}
}
func TestPushCertaintyFilterScope(t *testing.T) {
	tempDir := t.TempDir()
	// Segment has two labels from different filters, both Kiwi certainty=90
	data := []any{
		map[string]any{"Operator": "test"},
		[]any{0.0, 10.0, 100.0, 1000.0, []any{
			map[string]any{"species": "Kiwi", "certainty": 90, "filter": "model-a"},
			map[string]any{"species": "Kiwi", "certainty": 90, "filter": "model-b"},
		}},
	}
	raw, _ := json.Marshal(data)
	filePath := filepath.Join(tempDir, "file1.data")
	if err := os.WriteFile(filePath, raw, 0644); err != nil {
		t.Fatal(err)
	}
	// Push only model-a
	result, err := PushCertainty(PushCertaintyConfig{
		Folder:   tempDir,
		Filter:   "model-a",
		Species:  "Kiwi",
		Reviewer: "TestReviewer",
	})
	if err != nil {
		t.Fatal(err)
	}
	if result.SegmentsUpdated != 1 {
		t.Errorf("expected 1 segment updated, got %d", result.SegmentsUpdated)
	}
	// Verify only model-a label was promoted; model-b stays at 90
	df, err := utils.ParseDataFile(filePath)
	if err != nil {
		t.Fatal(err)
	}
	for _, label := range df.Segments[0].Labels {
		if label.Filter == "model-a" && label.Certainty != 100 {
			t.Errorf("model-a label should be 100, got %d", label.Certainty)
		}
		if label.Filter == "model-b" && label.Certainty != 90 {
			t.Errorf("model-b label should be unchanged at 90, got %d", label.Certainty)
		}
	}
}

File deletion: calls_push_certainty.go

BF:BFD[6.248737] → [6.409526:409573]

BF:BFD[6.409573] → [6.407012:407012]

B:BD[6.407012] → [6.407013:409525]

package tools
import (
	"fmt"
	"skraak/utils"
)
// PushCertaintyConfig holds the configuration for push-certainty
type PushCertaintyConfig struct {
	Folder   string
	File     string
	Filter   string
	Species  string
	CallType string
	Night    bool
	Day      bool
	Lat      float64
	Lng      float64
	Timezone string
	Reviewer string
}
// PushCertaintyResult holds the result of push-certainty
type PushCertaintyResult struct {
	SegmentsUpdated   int `json:"segments_updated"`
	FilesUpdated      int `json:"files_updated"`
	TimeFilteredCount int `json:"time_filtered_count"`
}
// PushCertainty promotes all certainty=90 segments matching the filter scope to certainty=100.
// Uses identical filtering logic to LoadDataFiles so the scope matches calls classify exactly.
func PushCertainty(config PushCertaintyConfig) (*PushCertaintyResult, error) {
	state, err := LoadDataFiles(ClassifyConfig{
		Folder:    config.Folder,
		File:      config.File,
		Filter:    config.Filter,
		Species:   config.Species,
		CallType:  config.CallType,
		Certainty: 90,
		Sample:    -1,
		Night:     config.Night,
		Day:       config.Day,
		Lat:       config.Lat,
		Lng:       config.Lng,
		Timezone:  config.Timezone,
	})
	if err != nil {
		return nil, err
	}
	var segsUpdated, filesUpdated int
	for i, df := range state.DataFiles {
		changed := false
		for _, seg := range state.FilteredSegs()[i] {
			for _, label := range seg.Labels {
				if labelMatchesPush(label, config.Filter, config.Species, config.CallType) {
					label.Certainty = 100
					changed = true
					segsUpdated++
				}
			}
		}
		if changed {
			df.Meta.Reviewer = config.Reviewer
			if err := df.Write(df.FilePath); err != nil {
				return nil, fmt.Errorf("write %s: %w", df.FilePath, err)
			}
			filesUpdated++
		}
	}
	return &PushCertaintyResult{
		SegmentsUpdated:   segsUpdated,
		FilesUpdated:      filesUpdated,
		TimeFilteredCount: state.TimeFilteredCount,
	}, nil
}
// labelMatchesPush returns true if the label matches the push scope and has certainty=90.
// Certainty is already guaranteed by LoadDataFiles, but we re-check to target only the
// specific label that matched (a segment may carry labels from multiple filters).
func labelMatchesPush(label *utils.Label, filter, species, callType string) bool {
	if filter != "" && label.Filter != filter {
		return false
	}
	if species != "" && label.Species != species {
		return false
	}
	if callType != "" && label.CallType != callType {
		return false
	}
	return label.Certainty == 90
}

File deletion: calls_propagate_test.go

BF:BFD[6.248737] → [6.430676:430723]

BF:BFD[6.430723] → [6.409575:409575]

B:BD[6.409575] → [6.409576:425872]

∅:D[19.7764] → [6.425872:426917]

B:BD[6.425872] → [6.425872:426917]

B:BD[6.426917] → [19.7765:8043]

∅:D[19.8043] → [6.427710:427711]

B:BD[6.427710] → [6.427710:427711]

B:BD[6.427711] → [19.8044:8399]

∅:D[19.8399] → [6.428073:428074]

B:BD[6.428073] → [6.428073:428074]

B:BD[6.428074] → [19.8400:8591]

∅:D[19.8591] → [6.428269:428270]

B:BD[6.428269] → [6.428269:428270]

B:BD[6.428270] → [19.8592:8931]

∅:D[19.8931] → [6.428643:430675]

B:BD[6.428643] → [6.428643:430675]

B:BD[6.425872] → [19.6906:7764]

package tools
import (
	"path/filepath"
	"testing"
	"skraak/utils"
)
// helpers
func seg(start, end float64, labels ...*utils.Label) *utils.Segment {
	return &utils.Segment{
		StartTime: start,
		EndTime:   end,
		FreqLow:   100,
		FreqHigh:  8000,
		Labels:    labels,
	}
}
func lbl(filter, species, calltype string, certainty int) *utils.Label {
	return &utils.Label{
		Filter:    filter,
		Species:   species,
		CallType:  calltype,
		Certainty: certainty,
	}
}
func writeFile(t *testing.T, segs ...*utils.Segment) string {
	t.Helper()
	dir := t.TempDir()
	path := filepath.Join(dir, "test.data")
	df := &utils.DataFile{
		Meta:     &utils.DataMeta{Operator: "ML", Reviewer: "David", Duration: 3600},
		Segments: segs,
	}
	if err := df.Write(path); err != nil {
		t.Fatalf("write fixture: %v", err)
	}
	return path
}
func readFile(t *testing.T, path string) *utils.DataFile {
	t.Helper()
	df, err := utils.ParseDataFile(path)
	if err != nil {
		t.Fatalf("parse %s: %v", path, err)
	}
	return df
}
// findLabel returns the label with matching filter and time on the parsed file, or nil.
func findLabel(df *utils.DataFile, filter string, start, end float64) *utils.Label {
	for _, s := range df.Segments {
		if s.StartTime != start || s.EndTime != end {
			continue
		}
		for _, l := range s.Labels {
			if l.Filter == filter {
				return l
			}
		}
	}
	return nil
}
const (
	fFrom = "opensoundscape-kiwi-1.2"
	fTo   = "opensoundscape-kiwi-1.5"
)
func TestPropagate_HappyPathSingle(t *testing.T) {
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v (%s)", err, out.Error)
	}
	if out.Propagated != 1 || out.TargetsExamined != 1 || out.SkippedConflict != 0 || out.SkippedNoOverlap != 0 {
		t.Fatalf("counts wrong: %+v", out)
	}
	df := readFile(t, path)
	target := findLabel(df, fTo, 100, 125)
	if target == nil {
		t.Fatal("target label missing")
	}
	if target.Species != "Kiwi" || target.CallType != "Male" || target.Certainty != 90 {
		t.Errorf("target not updated correctly: species=%q calltype=%q cert=%d", target.Species, target.CallType, target.Certainty)
	}
	if df.Meta.Reviewer != "Skraak" {
		t.Errorf("reviewer = %q, want Skraak", df.Meta.Reviewer)
	}
}
func TestPropagate_NoOverlap(t *testing.T) {
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(500, 525, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.Propagated != 0 || out.TargetsExamined != 1 || out.SkippedNoOverlap != 1 {
		t.Fatalf("counts wrong: %+v", out)
	}
	df := readFile(t, path)
	target := findLabel(df, fTo, 500, 525)
	if target.Certainty != 70 {
		t.Errorf("target should not be modified, cert=%d", target.Certainty)
	}
	if df.Meta.Reviewer != "David" {
		t.Errorf("reviewer should stay David (no write), got %q", df.Meta.Reviewer)
	}
}
func TestPropagate_SourceWrongSpecies_Ignored(t *testing.T) {
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Weka", "", 100)),
		seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.Propagated != 0 || out.SkippedNoOverlap != 1 {
		t.Fatalf("counts wrong: %+v", out)
	}
}
func TestPropagate_SourceWrongCertainty_Ignored(t *testing.T) {
	// cert=70 and cert=0 source labels must NOT count as sources.
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 70)),
		seg(200, 225, lbl(fFrom, "Don't Know", "", 0)),
		seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
		seg(200, 225, lbl(fTo, "Kiwi", "Male", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.Propagated != 0 || out.SkippedNoOverlap != 2 {
		t.Fatalf("counts wrong: %+v", out)
	}
}
func TestPropagate_SourceWrongFilter_Ignored(t *testing.T) {
	path := writeFile(t,
		seg(100, 125, lbl("some-other-filter", "Kiwi", "Male", 100)),
		seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if !out.FiltersMissing || out.Propagated != 0 || out.TargetsExamined != 0 {
		t.Fatalf("expected FiltersMissing=true with zero counts, got: %+v", out)
	}
}
func TestPropagate_TargetCert100_NotTouched(t *testing.T) {
	// Target with cert=100 is human-verified — must NOT be overwritten.
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(100, 125, lbl(fTo, "Kiwi", "Male", 100)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.TargetsExamined != 0 || out.Propagated != 0 {
		t.Fatalf("cert=100 target must not be examined: %+v", out)
	}
	df := readFile(t, path)
	if df.Meta.Reviewer != "David" {
		t.Errorf("reviewer should stay David (no write), got %q", df.Meta.Reviewer)
	}
}
func TestPropagate_TargetCert90_NotTouched(t *testing.T) {
	// Target with cert=90 (already propagated earlier) must NOT be re-propagated.
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(100, 125, lbl(fTo, "Kiwi", "Female", 90)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.TargetsExamined != 0 || out.Propagated != 0 {
		t.Fatalf("cert=90 target must not be examined: %+v", out)
	}
	df := readFile(t, path)
	target := findLabel(df, fTo, 100, 125)
	if target.Certainty != 90 || target.CallType != "Female" {
		t.Errorf("cert=90 target was modified: %+v", target)
	}
}
func TestPropagate_TargetCert0_Propagated(t *testing.T) {
	// Target at cert=0 ("Don't Know" / "Noise") SHOULD be propagated when an
	// overlapping cert=100 source exists — rescues labels from the noise bucket
	// so they surface for review even if occasionally wrong.
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(100, 125, lbl(fTo, "Don't Know", "", 0)),
		seg(200, 225, lbl(fFrom, "Kiwi", "Female", 100)),
		seg(200, 225, lbl(fTo, "Noise", "", 0)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.TargetsExamined != 2 || out.Propagated != 2 {
		t.Fatalf("cert=0 targets must be propagated: %+v", out)
	}
	df := readFile(t, path)
	for _, c := range []struct {
		start, end float64
		calltype   string
	}{{100, 125, "Male"}, {200, 225, "Female"}} {
		l := findLabel(df, fTo, c.start, c.end)
		if l == nil || l.Species != "Kiwi" || l.CallType != c.calltype || l.Certainty != 90 {
			t.Errorf("at %v-%v got %+v, want Kiwi+%s cert=90", c.start, c.end, l, c.calltype)
		}
	}
}
func TestPropagate_MultipleSourcesAgree(t *testing.T) {
	// Two overlapping sources with same calltype → propagate.
	path := writeFile(t,
		seg(100, 110, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(105, 120, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.Propagated != 1 || out.SkippedConflict != 0 {
		t.Fatalf("counts wrong: %+v", out)
	}
	df := readFile(t, path)
	target := findLabel(df, fTo, 100, 125)
	if target.CallType != "Male" {
		t.Errorf("calltype should be Male, got %q", target.CallType)
	}
}
func TestPropagate_MultipleSourcesConflict(t *testing.T) {
	// Two overlapping sources with different calltypes → conflict, skip, report.
	path := writeFile(t,
		seg(100, 110, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(115, 120, lbl(fFrom, "Kiwi", "Female", 100)),
		seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.Propagated != 0 || out.SkippedConflict != 1 {
		t.Fatalf("expected 1 conflict skip: %+v", out)
	}
	if len(out.Conflicts) != 1 {
		t.Fatalf("expected 1 conflict report, got %d", len(out.Conflicts))
	}
	if out.Conflicts[0].TargetStart != 100 || out.Conflicts[0].TargetEnd != 125 {
		t.Errorf("conflict target wrong: %+v", out.Conflicts[0])
	}
	if len(out.Conflicts[0].SourceChoices) != 2 {
		t.Errorf("expected 2 source choices, got %d", len(out.Conflicts[0].SourceChoices))
	}
	// Target must NOT be modified.
	df := readFile(t, path)
	target := findLabel(df, fTo, 100, 125)
	if target.CallType != "Duet" || target.Certainty != 70 {
		t.Errorf("conflicted target was modified: %+v", target)
	}
	if df.Meta.Reviewer != "David" {
		t.Errorf("reviewer should stay David (no write), got %q", df.Meta.Reviewer)
	}
}
func TestPropagate_EmptyCallTypePropagates(t *testing.T) {
	// Source with empty calltype → target gets empty calltype.
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Kiwi", "", 100)),
		seg(100, 125, lbl(fTo, "Kiwi", "Male", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.Propagated != 1 {
		t.Fatalf("expected propagated=1: %+v", out)
	}
	df := readFile(t, path)
	target := findLabel(df, fTo, 100, 125)
	if target.CallType != "" {
		t.Errorf("calltype should be cleared, got %q", target.CallType)
	}
	if target.Species != "Kiwi" || target.Certainty != 90 {
		t.Errorf("target fields wrong: %+v", target)
	}
}
func TestPropagate_SpeciesOverride(t *testing.T) {
	// Target species was different from --species; must be overwritten.
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(100, 125, lbl(fTo, "Don't Know", "", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.Propagated != 1 {
		t.Fatalf("expected propagated=1: %+v", out)
	}
	df := readFile(t, path)
	target := findLabel(df, fTo, 100, 125)
	if target.Species != "Kiwi" || target.CallType != "Male" || target.Certainty != 90 {
		t.Errorf("target not overwritten correctly: %+v", target)
	}
}
func TestPropagate_OverlapBoundaryExclusive(t *testing.T) {
	// Segments touching at a point (src ends exactly where tgt starts) do NOT overlap.
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(125, 150, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.Propagated != 0 || out.SkippedNoOverlap != 1 {
		t.Fatalf("touching boundary must not count as overlap: %+v", out)
	}
}
func TestPropagate_OverlapPartial(t *testing.T) {
	// 1-second overlap is enough.
	path := writeFile(t,
		seg(100, 126, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(125, 150, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.Propagated != 1 {
		t.Fatalf("expected propagated=1: %+v", out)
	}
}
func TestPropagate_SupersetEitherDirection(t *testing.T) {
	// Source engulfs target.
	path1 := writeFile(t,
		seg(100, 200, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(110, 150, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	if out, _ := CallsPropagate(CallsPropagateInput{File: path1, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"}); out.Propagated != 1 {
		t.Errorf("source-engulfs-target: %+v", out)
	}
	// Target engulfs source.
	path2 := writeFile(t,
		seg(110, 150, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(100, 200, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	if out, _ := CallsPropagate(CallsPropagateInput{File: path2, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"}); out.Propagated != 1 {
		t.Errorf("target-engulfs-source: %+v", out)
	}
}
func TestPropagate_MissingFlags(t *testing.T) {
	cases := []struct {
		name string
		in   CallsPropagateInput
	}{
		{"no file", CallsPropagateInput{FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"}},
		{"no from", CallsPropagateInput{File: "x", ToFilter: fTo, Species: "Kiwi"}},
		{"no to", CallsPropagateInput{File: "x", FromFilter: fFrom, Species: "Kiwi"}},
		{"no species", CallsPropagateInput{File: "x", FromFilter: fFrom, ToFilter: fTo}},
	}
	for _, c := range cases {
		t.Run(c.name, func(t *testing.T) {
			_, err := CallsPropagate(c.in)
			if err == nil {
				t.Errorf("expected error")
			}
		})
	}
}
func TestPropagate_SameFromAndTo(t *testing.T) {
	_, err := CallsPropagate(CallsPropagateInput{
		File: "x", FromFilter: fFrom, ToFilter: fFrom, Species: "Kiwi",
	})
	if err == nil {
		t.Error("expected error when --from == --to")
	}
}
func TestPropagate_NonexistentFile(t *testing.T) {
	_, err := CallsPropagate(CallsPropagateInput{
		File: "/nonexistent/path.data", FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err == nil {
		t.Error("expected error for nonexistent file")
	}
}
func TestPropagate_RealisticMixed(t *testing.T) {
	// Mimics the 20260228_211500.WAV.data case: cert=0 "Don't Know" and cert=100 Kiwi sources
	// coexist; only cert=100 Kiwi gets propagated.
	path := writeFile(t,
		// Sources (kiwi-1.2)
		seg(45, 52.5, lbl(fFrom, "Don't Know", "", 0)),
		seg(142.5, 177.5, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(195, 217.5, lbl(fFrom, "Don't Know", "", 0)),
		seg(647.5, 682.5, lbl(fFrom, "Kiwi", "Female", 100)),
		seg(815, 855, lbl(fFrom, "Kiwi", "Duet", 100)),
		// Targets (kiwi-1.5)
		seg(147.5, 167.5, lbl(fTo, "Kiwi", "Male", 70)),
		seg(647.5, 672.5, lbl(fTo, "Kiwi", "Female", 70)),
		seg(815, 852.5, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.TargetsExamined != 3 || out.Propagated != 3 || out.SkippedConflict != 0 {
		t.Fatalf("counts wrong: %+v", out)
	}
	df := readFile(t, path)
	expect := []struct {
		start, end float64
		calltype   string
	}{
		{147.5, 167.5, "Male"},
		{647.5, 672.5, "Female"},
		{815, 852.5, "Duet"},
	}
	for _, e := range expect {
		l := findLabel(df, fTo, e.start, e.end)
		if l == nil || l.Certainty != 90 || l.CallType != e.calltype || l.Species != "Kiwi" {
			t.Errorf("at %v-%v got %+v, want Kiwi+%s cert=90", e.start, e.end, l, e.calltype)
		}
	}
}
func TestPropagate_NoWriteIfNothingChanged(t *testing.T) {
	// File with only non-target segments should not be rewritten (reviewer unchanged).
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.Propagated != 0 || out.TargetsExamined != 0 {
		t.Fatalf("expected no activity: %+v", out)
	}
	df := readFile(t, path)
	if df.Meta.Reviewer != "David" {
		t.Errorf("reviewer should not be touched, got %q", df.Meta.Reviewer)
	}
}
// writeFileAt is like writeFile but puts the file inside an existing dir
// with a caller-provided basename (must end in .data).
func writeFileAt(t *testing.T, dir, base string, segs ...*utils.Segment) string {
	t.Helper()
	path := filepath.Join(dir, base)
	df := &utils.DataFile{
		Meta:     &utils.DataMeta{Operator: "ML", Reviewer: "David", Duration: 3600},
		Segments: segs,
	}
	if err := df.Write(path); err != nil {
		t.Fatalf("write fixture: %v", err)
	}
	return path
}
func TestPropagateFolder_AggregatesAndSkipsMissing(t *testing.T) {
	dir := t.TempDir()
	// File A: both filters present, one clean propagation.
	aPath := writeFileAt(t, dir, "a.wav.data",
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	// File B: only target filter — missing source, must be skipped silently.
	bPath := writeFileAt(t, dir, "b.wav.data",
		seg(200, 225, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	// File C: only source filter — missing target, must be skipped silently.
	writeFileAt(t, dir, "c.wav.data",
		seg(300, 325, lbl(fFrom, "Kiwi", "Male", 100)),
	)
	// File D: both filters, but no overlap → targets examined, none propagated.
	dPath := writeFileAt(t, dir, "d.wav.data",
		seg(400, 425, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(500, 525, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	out, err := CallsPropagateFolder(CallsPropagateFolderInput{
		Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	assertPropagateStats(t, out, CallsPropagateFolderOutput{
		FilesTotal:           4,
		FilesWithBothFilters: 2,
		FilesSkippedNoFilter: 2,
		FilesChanged:         1,
		FilesErrored:         0,
		TargetsExamined:      2,
		Propagated:           1,
		SkippedNoOverlap:     1,
	})
	t.Run("file_a_propagated", func(t *testing.T) {
		aDf := readFile(t, aPath)
		if aDf.Meta.Reviewer != "Skraak" {
			t.Errorf("reviewer: got %q, want Skraak", aDf.Meta.Reviewer)
		}
		if l := findLabel(aDf, fTo, 100, 125); l == nil || l.Certainty != 90 || l.CallType != "Male" {
			t.Errorf("target label: got %+v, want cert=90 calltype=Male", l)
		}
	})
	t.Run("file_b_skipped", func(t *testing.T) {
		bDf := readFile(t, bPath)
		if bDf.Meta.Reviewer != "David" {
			t.Errorf("reviewer should not be touched, got %q", bDf.Meta.Reviewer)
		}
	})
	t.Run("file_d_no_overlap", func(t *testing.T) {
		dDf := readFile(t, dPath)
		if dDf.Meta.Reviewer != "David" {
			t.Errorf("reviewer should not be touched, got %q", dDf.Meta.Reviewer)
		}
		if l := findLabel(dDf, fTo, 500, 525); l == nil || l.Certainty != 70 {
			t.Errorf("target label should be unchanged cert=70, got %+v", l)
		}
	})
}
func TestPropagateFolder_EmptyFolder(t *testing.T) {
	dir := t.TempDir()
	out, err := CallsPropagateFolder(CallsPropagateFolderInput{
		Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.FilesTotal != 0 || out.Propagated != 0 {
		t.Errorf("expected empty result, got %+v", out)
	}
}
func TestPropagateFolder_MissingRequiredFlags(t *testing.T) {
	dir := t.TempDir()
	cases := []CallsPropagateFolderInput{
		{Folder: "", FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"},
		{Folder: dir, FromFilter: "", ToFilter: fTo, Species: "Kiwi"},
		{Folder: dir, FromFilter: fFrom, ToFilter: "", Species: "Kiwi"},
		{Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: ""},
		{Folder: dir, FromFilter: fFrom, ToFilter: fFrom, Species: "Kiwi"},
	}
	for i, in := range cases {
		if _, err := CallsPropagateFolder(in); err == nil {
			t.Errorf("case %d: expected error for input %+v", i, in)
		}
	}
}
func TestPropagateFolder_NonexistentFolder(t *testing.T) {
	_, err := CallsPropagateFolder(CallsPropagateFolderInput{
		Folder: "/nonexistent/path/xyz", FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err == nil {
		t.Fatal("expected error for nonexistent folder")
	}
}
func TestPropagateFolder_ConflictsTaggedWithFile(t *testing.T) {
	dir := t.TempDir()
	// Two sources with different calltypes both overlapping one target.
	writeFileAt(t, dir, "conflict.wav.data",
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(110, 130, lbl(fFrom, "Kiwi", "Female", 100)),
		seg(100, 130, lbl(fTo, "Kiwi", "", 70)),
	)
	out, err := CallsPropagateFolder(CallsPropagateFolderInput{
		Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.SkippedConflict != 1 || len(out.Conflicts) != 1 {
		t.Fatalf("expected one conflict, got %+v", out)
	}
	if out.Conflicts[0].File == "" {
		t.Errorf("conflict should be tagged with file path, got %+v", out.Conflicts[0])
	}
}
}
// assertPropagateStats checks output stats against expected values.
func assertPropagateStats(t *testing.T, got, want CallsPropagateFolderOutput) {
	t.Helper()
	checks := []struct {
		name string
		got  int
		want int
	}{
		{"FilesTotal", got.FilesTotal, want.FilesTotal},
		{"FilesWithBothFilters", got.FilesWithBothFilters, want.FilesWithBothFilters},
		{"FilesSkippedNoFilter", got.FilesSkippedNoFilter, want.FilesSkippedNoFilter},
		{"FilesChanged", got.FilesChanged, want.FilesChanged},
		{"FilesErrored", got.FilesErrored, want.FilesErrored},
		{"TargetsExamined", got.TargetsExamined, want.TargetsExamined},
		{"Propagated", got.Propagated, want.Propagated},
		{"SkippedNoOverlap", got.SkippedNoOverlap, want.SkippedNoOverlap},
	}
	for _, c := range checks {
		if c.got != c.want {
			t.Errorf("%s: got %d, want %d", c.name, c.got, c.want)
		}
	}

File deletion: calls_propagate.go

BF:BFD[6.248737] → [6.441079:441121]

BF:BFD[6.441121] → [6.430725:430725]

B:BD[6.430725] → [6.430726:434809]

B:BD[6.434809] → [18.11171:11383]

∅:D[18.11383] → [6.434870:434921]

B:BD[6.434870] → [6.434870:434921]

B:BD[6.434921] → [18.11384:11563]

∅:D[18.11563] → [6.435036:435039]

B:BD[6.435036] → [6.435036:435039]

B:BD[6.435039] → [18.11564:12314]

∅:D[18.12314] → [6.435150:435153]

B:BD[6.435150] → [6.435150:435153]

B:BD[6.435153] → [18.12315:12424]

∅:D[18.12424] → [6.435268:435359]

B:BD[6.435268] → [6.435268:435359]

B:BD[6.435359] → [18.12425:12465]

∅:D[18.12465] → [6.435407:435410]

B:BD[6.435407] → [6.435407:435410]

B:BD[6.435411] → [6.435411:435530]

B:BD[6.435530] → [18.12466:12506]

∅:D[18.12506] → [6.435578:435581]

B:BD[6.435578] → [6.435578:435581]

∅:D[18.12521] → [6.435581:435582]

B:BD[6.435581] → [6.435581:435582]

B:BD[6.435582] → [18.12522:12679]

∅:D[18.12679] → [6.435823:435925]

B:BD[6.435823] → [6.435823:435925]

B:BD[6.435925] → [18.12680:12713]

∅:D[18.12713] → [6.435964:435988]

B:BD[6.435964] → [6.435964:435988]

B:BD[6.435988] → [18.12714:12745]

∅:D[18.12745] → [6.436025:436072]

B:BD[6.436025] → [6.436025:436072]

B:BD[6.436072] → [18.12746:12762]

∅:D[18.12762] → [6.436082:436091]

B:BD[6.436082] → [6.436082:436091]

B:BD[6.436208] → [6.436208:436211]

∅:D[18.12779] → [6.436211:436212]

B:BD[6.436211] → [6.436211:436212]

B:BD[6.436212] → [18.12780:13104]

∅:D[18.13104] → [6.436284:436379]

B:BD[6.436284] → [6.436284:436379]

B:BD[6.436379] → [18.13105:13188]

∅:D[18.13188] → [6.436474:436559]

B:BD[6.436474] → [6.436474:436559]

∅:D[18.13207] → [6.436559:436560]

B:BD[6.436559] → [6.436559:436560]

B:BD[6.436560] → [18.13208:13438]

∅:D[18.13438] → [6.436578:436614]

B:BD[6.436578] → [6.436578:436614]

B:BD[6.436614] → [18.13439:13506]

∅:D[18.13506] → [6.436798:436864]

B:BD[6.436798] → [6.436798:436864]

B:BD[6.436864] → [18.13507:13559]

∅:D[18.13559] → [6.437038:437110]

B:BD[6.437038] → [6.437038:437110]

B:BD[6.437110] → [18.13560:13616]

∅:D[18.13616] → [6.437294:437338]

B:BD[6.437294] → [6.437294:437338]

B:BD[6.437338] → [18.13617:13710]

∅:D[18.13710] → [6.437833:437850]

B:BD[6.437833] → [6.437833:437850]

B:BD[6.437850] → [18.13711:14445]

∅:D[18.14445] → [6.438142:438146]

B:BD[6.438142] → [6.438142:438146]

∅:D[18.14468] → [6.438146:438147]

B:BD[6.438146] → [6.438146:438147]

B:BD[6.438147] → [18.14469:14849]

∅:D[18.14849] → [6.438259:438260]

B:BD[6.438259] → [6.438259:438260]

B:BD[6.438260] → [18.14850:15472]

∅:D[18.15472] → [6.438332:438335]

B:BD[6.438332] → [6.438332:438335]

∅:D[18.15475] → [6.438335:438336]

B:BD[6.438335] → [6.438335:438336]

B:BD[6.438336] → [18.15476:15938]

∅:D[18.15938] → [6.438543:438546]

B:BD[6.438543] → [6.438543:438546]

∅:D[18.16020] → [6.438546:438547]

B:BD[6.438546] → [6.438546:438547]

B:BD[6.438547] → [18.16021:16091]

∅:D[18.16091] → [6.438567:441078]

B:BD[6.438567] → [6.438567:441078]

B:BD[6.438546] → [18.15939:16020]

B:BD[6.438335] → [18.15473:15475]

B:BD[6.438146] → [18.14446:14468]

B:BD[6.436559] → [18.13189:13207]

B:BD[6.436211] → [18.12763:12779]

B:BD[6.435581] → [18.12507:12521]

package tools
import (
	"fmt"
	"os"
	"skraak/utils"
)
type CallsPropagateInput struct {
	File       string `json:"file"`
	FromFilter string `json:"from_filter"`
	ToFilter   string `json:"to_filter"`
	Species    string `json:"species"`
}
type CallsPropagateOutput struct {
	File             string              `json:"file"`
	FromFilter       string              `json:"from_filter"`
	ToFilter         string              `json:"to_filter"`
	Species          string              `json:"species"`
	FiltersMissing   bool                `json:"filters_missing,omitempty"`
	TargetsExamined  int                 `json:"targets_examined"`
	Propagated       int                 `json:"propagated"`
	SkippedNoOverlap int                 `json:"skipped_no_overlap"`
	SkippedConflict  int                 `json:"skipped_conflict"`
	Conflicts        []PropagateConflict `json:"conflicts,omitempty"`
	Changes          []PropagateChange   `json:"changes,omitempty"`
	Error            string              `json:"error,omitempty"`
}
type CallsPropagateFolderInput struct {
	Folder     string `json:"folder"`
	FromFilter string `json:"from_filter"`
	ToFilter   string `json:"to_filter"`
	Species    string `json:"species"`
}
type CallsPropagateFolderOutput struct {
	Folder               string                 `json:"folder"`
	FromFilter           string                 `json:"from_filter"`
	ToFilter             string                 `json:"to_filter"`
	Species              string                 `json:"species"`
	FilesTotal           int                    `json:"files_total"`
	FilesWithBothFilters int                    `json:"files_with_both_filters"`
	FilesSkippedNoFilter int                    `json:"files_skipped_no_filter"`
	FilesChanged         int                    `json:"files_changed"`
	FilesErrored         int                    `json:"files_errored"`
	TargetsExamined      int                    `json:"targets_examined"`
	Propagated           int                    `json:"propagated"`
	SkippedNoOverlap     int                    `json:"skipped_no_overlap"`
	SkippedConflict      int                    `json:"skipped_conflict"`
	Conflicts            []PropagateConflict    `json:"conflicts,omitempty"`
	Errors               []CallsPropagateOutput `json:"errors,omitempty"`
	Error                string                 `json:"error,omitempty"`
}
type PropagateConflict struct {
	File           string                  `json:"file,omitempty"`
	TargetStart    float64                 `json:"target_start"`
	TargetEnd      float64                 `json:"target_end"`
	TargetCallType string                  `json:"target_calltype,omitempty"`
	SourceChoices  []PropagateSourceChoice `json:"source_choices"`
}
type PropagateSourceChoice struct {
	Start    float64 `json:"start"`
	End      float64 `json:"end"`
	Species  string  `json:"species"`
	CallType string  `json:"calltype,omitempty"`
}
type PropagateChange struct {
	TargetStart   float64 `json:"target_start"`
	TargetEnd     float64 `json:"target_end"`
	PrevSpecies   string  `json:"prev_species"`
	PrevCallType  string  `json:"prev_calltype,omitempty"`
	PrevCertainty int     `json:"prev_certainty"`
	NewSpecies    string  `json:"new_species"`
	NewCallType   string  `json:"new_calltype,omitempty"`
	NewCertainty  int     `json:"new_certainty"`
}
// CallsPropagate copies verified classifications (certainty==100) from one filter's
// segments to overlapping target segments of another filter, within a single .data file.
// Target labels with certainty==70 (ML-unverified) or certainty==0 (Don't Know / Noise)
// are updated — targets at certainty==100 (human-verified) and certainty==90 (already
// propagated) are left alone. Only source labels matching --species are considered.
// Propagated target labels are set to certainty=90 and file reviewer is set to "Skraak".
func CallsPropagate(input CallsPropagateInput) (CallsPropagateOutput, error) {
	output := CallsPropagateOutput{
		File:       input.File,
		FromFilter: input.FromFilter,
		ToFilter:   input.ToFilter,
		Species:    input.Species,
	}
	if err := validatePropagateInput(&output, input); err != nil {
		return output, err
	}
	df, err := utils.ParseDataFile(input.File)
	if err != nil {
		output.Error = fmt.Sprintf("parse %s: %v", input.File, err)
		return output, fmt.Errorf("%s", output.Error)
	}
	// Fast path: skip files that don't contain both filters at all.
	if !hasBothFilters(df, input.FromFilter, input.ToFilter) {
		output.FiltersMissing = true
		return output, nil
	}
	sources := collectPropagateSources(df, input.FromFilter, input.Species)
	propagateTargets(df, sources, input, &output)
	if output.Propagated > 0 {
		df.Meta.Reviewer = "Skraak"
		if err := df.Write(input.File); err != nil {
			output.Error = fmt.Sprintf("write %s: %v", input.File, err)
			return output, fmt.Errorf("%s", output.Error)
		}
	}
	return output, nil
}
// validatePropagateInput checks required fields and file existence
func validatePropagateInput(output *CallsPropagateOutput, input CallsPropagateInput) error {
	checks := []struct {
		val string
		msg string
	}{
		{input.File, "--file is required"},
		{input.FromFilter, "--from is required"},
		{input.ToFilter, "--to is required"},
		{input.Species, "--species is required"},
	}
	for _, c := range checks {
		if c.val == "" {
			output.Error = c.msg
			return fmt.Errorf("%s", c.msg)
		}
	}
	if input.FromFilter == input.ToFilter {
		output.Error = "--from and --to must differ"
		return fmt.Errorf("%s", output.Error)
	}
	if _, err := os.Stat(input.File); os.IsNotExist(err) {
		output.Error = fmt.Sprintf("file not found: %s", input.File)
		return fmt.Errorf("%s", output.Error)
	}
// hasBothFilters checks whether the data file contains both from and to filters
func hasBothFilters(df *utils.DataFile, fromFilter, toFilter string) bool {
	hasFrom, hasTo := false, false
	for _, seg := range df.Segments {
		for _, lbl := range seg.Labels {
			if lbl.Filter == fromFilter {
				hasFrom = true
			}
			if lbl.Filter == toFilter {
				hasTo = true
			}
			if hasFrom && hasTo {
				return true
			}
		}
	}
// sourceRef pairs a segment with its matching source label
type sourceRef struct {
	seg   *utils.Segment
	label *utils.Label
}
// collectPropagateSources gathers verified source labels (certainty==100) for the given filter/species
func collectPropagateSources(df *utils.DataFile, fromFilter, species string) []sourceRef {
	var sources []sourceRef
	for _, seg := range df.Segments {
		for _, lbl := range seg.Labels {
			if lbl.Filter == fromFilter && lbl.Species == species && lbl.Certainty == 100 {
				sources = append(sources, sourceRef{seg: seg, label: lbl})
				break
			}
		}
	}
// propagateTargets iterates target segments, finds overlapping sources, and applies agreed classifications
func propagateTargets(df *utils.DataFile, sources []sourceRef, input CallsPropagateInput, output *CallsPropagateOutput) {
	for _, tSeg := range df.Segments {
		toLabel := findUpdatableTargetLabel(tSeg.Labels, input.ToFilter)
		if toLabel == nil {
			continue
		}
		output.TargetsExamined++
		overlaps := findOverlappingSources(sources, tSeg)
		if len(overlaps) == 0 {
			output.SkippedNoOverlap++
			continue
		}
		agreedCallType, conflict := resolveCallType(overlaps)
		if conflict {
			output.SkippedConflict++
			output.Conflicts = append(output.Conflicts, buildConflictRecord(tSeg, toLabel, overlaps))
			continue
		}
		applyPropagation(toLabel, input.Species, agreedCallType, tSeg, output)
	}
}
// findUpdatableTargetLabel finds a target label with certainty 70 or 0 for the given filter
func findUpdatableTargetLabel(labels []*utils.Label, toFilter string) *utils.Label {
	for _, lbl := range labels {
		if lbl.Filter == toFilter && (lbl.Certainty == 70 || lbl.Certainty == 0) {
			return lbl
		}
	}
	return nil
}
// findOverlappingSources returns sources whose segments overlap with the target segment
func findOverlappingSources(sources []sourceRef, tSeg *utils.Segment) []sourceRef {
	var overlaps []sourceRef
	for _, s := range sources {
		if s.seg.StartTime < tSeg.EndTime && tSeg.StartTime < s.seg.EndTime {
			overlaps = append(overlaps, s)
		}
// resolveCallType checks if all overlapping sources agree on a call type.
// Returns the agreed call type and whether there is a conflict.
func resolveCallType(overlaps []sourceRef) (string, bool) {
	agreedCallType := overlaps[0].label.CallType
	for _, s := range overlaps[1:] {
		if s.label.CallType != agreedCallType {
			return "", true
		}
	}
	return agreedCallType, false
}
// buildConflictRecord creates a PropagateConflict from overlapping disagreeing sources
func buildConflictRecord(tSeg *utils.Segment, toLabel *utils.Label, overlaps []sourceRef) PropagateConflict {
	choices := make([]PropagateSourceChoice, 0, len(overlaps))
	for _, s := range overlaps {
		choices = append(choices, PropagateSourceChoice{
			Start:    s.seg.StartTime,
			End:      s.seg.EndTime,
			Species:  s.label.Species,
			CallType: s.label.CallType,
		})
	}
	return PropagateConflict{
		TargetStart:    tSeg.StartTime,
		TargetEnd:      tSeg.EndTime,
		TargetCallType: toLabel.CallType,
		SourceChoices:  choices,
	}
// applyPropagation updates the target label and records the change
func applyPropagation(toLabel *utils.Label, species, callType string, tSeg *utils.Segment, output *CallsPropagateOutput) {
	change := PropagateChange{
		TargetStart:   tSeg.StartTime,
		TargetEnd:     tSeg.EndTime,
		PrevSpecies:   toLabel.Species,
		PrevCallType:  toLabel.CallType,
		PrevCertainty: toLabel.Certainty,
		NewSpecies:    species,
		NewCallType:   callType,
		NewCertainty:  90,
	}
	output.Propagated++
	output.Changes = append(output.Changes, change)
}
// CallsPropagateFolder runs CallsPropagate against every .data file in a folder,
// aggregating counts. Files that do not contain both --from and --to filters are
// skipped silently (counted as files_skipped_no_filter). Parse/write errors on
// individual files are collected in Errors; they don't abort the run.
func CallsPropagateFolder(input CallsPropagateFolderInput) (CallsPropagateFolderOutput, error) {
	output := CallsPropagateFolderOutput{
		Folder:     input.Folder,
		FromFilter: input.FromFilter,
		ToFilter:   input.ToFilter,
		Species:    input.Species,
	}
	if input.Folder == "" {
		output.Error = "--folder is required"
		return output, fmt.Errorf("%s", output.Error)
	}
	if input.FromFilter == "" {
		output.Error = "--from is required"
		return output, fmt.Errorf("%s", output.Error)
	}
	if input.ToFilter == "" {
		output.Error = "--to is required"
		return output, fmt.Errorf("%s", output.Error)
	}
	if input.Species == "" {
		output.Error = "--species is required"
		return output, fmt.Errorf("%s", output.Error)
	}
	if input.FromFilter == input.ToFilter {
		output.Error = "--from and --to must differ"
		return output, fmt.Errorf("%s", output.Error)
	}
	info, err := os.Stat(input.Folder)
	if err != nil {
		output.Error = fmt.Sprintf("folder not found: %s", input.Folder)
		return output, fmt.Errorf("%s", output.Error)
	}
	if !info.IsDir() {
		output.Error = fmt.Sprintf("not a directory: %s", input.Folder)
		return output, fmt.Errorf("%s", output.Error)
	}
	files, err := utils.FindDataFiles(input.Folder)
	if err != nil {
		output.Error = fmt.Sprintf("list .data files: %v", err)
		return output, fmt.Errorf("%s", output.Error)
	}
	output.FilesTotal = len(files)
	for _, f := range files {
		fileOut, err := CallsPropagate(CallsPropagateInput{
			File:       f,
			FromFilter: input.FromFilter,
			ToFilter:   input.ToFilter,
			Species:    input.Species,
		})
		if err != nil {
			output.FilesErrored++
			output.Errors = append(output.Errors, fileOut)
			continue
		}
		if fileOut.FiltersMissing {
			output.FilesSkippedNoFilter++
			continue
		}
		output.FilesWithBothFilters++
		output.TargetsExamined += fileOut.TargetsExamined
		output.Propagated += fileOut.Propagated
		output.SkippedNoOverlap += fileOut.SkippedNoOverlap
		output.SkippedConflict += fileOut.SkippedConflict
		if fileOut.Propagated > 0 {
			output.FilesChanged++
		}
		for _, c := range fileOut.Conflicts {
			c.File = f
			output.Conflicts = append(output.Conflicts, c)
		}
	}
	return output, nil
}
	toLabel.Species = species
	toLabel.CallType = callType
	toLabel.Certainty = 90
}
	}
	return overlaps
}
	return sources
}
	return false
}
	return nil
}

File deletion: calls_modify_test.go

BF:BFD[6.248737] → [6.450654:450698]

BF:BFD[6.450698] → [6.441123:441123]

B:BD[6.441123] → [6.441124:450653]

package tools
import (
	"path/filepath"
	"testing"
	"skraak/utils"
)
func TestCallsModifyBookmark(t *testing.T) {
	// Create a temp .data file with a bookmarked segment
	tmpDir := t.TempDir()
	tmpFile := filepath.Join(tmpDir, "test.data")
	df := &utils.DataFile{
		Meta: &utils.DataMeta{Operator: "test", Duration: 60},
		Segments: []*utils.Segment{
			{
				StartTime: 10.0,
				EndTime:   15.0,
				FreqLow:   100,
				FreqHigh:  5000,
				Labels: []*utils.Label{
					{Species: "Kiwi", Certainty: 80, Filter: "myfilter", CallType: "Duet", Bookmark: true},
				},
			},
		},
	}
	if err := df.Write(tmpFile); err != nil {
		t.Fatalf("failed to write test file: %v", err)
	}
	// Test 1: Adding bookmark when already true should do nothing
	bookmark := true
	result, err := CallsModify(CallsModifyInput{
		File:      tmpFile,
		Reviewer:  "tester",
		Filter:    "myfilter",
		Segment:   "10-15",
		Certainty: 80,
		Bookmark:  &bookmark,
	})
	// Should return error "no changes needed"
	if err == nil {
		t.Errorf("expected error 'no changes needed' when bookmark already true, got nil")
	}
	if result.Error != "No changes needed: all values already match" {
		t.Errorf("expected 'no changes needed' error, got: %s", result.Error)
	}
	// Verify bookmark is still true in the file
	df2, err := utils.ParseDataFile(tmpFile)
	if err != nil {
		t.Fatalf("failed to parse file: %v", err)
	}
	if !df2.Segments[0].Labels[0].Bookmark {
		t.Errorf("bookmark should still be true, got false")
	}
}
func TestCallsModifyBookmarkFalse(t *testing.T) {
	// Create a temp .data file WITHOUT a bookmark
	tmpDir := t.TempDir()
	tmpFile := filepath.Join(tmpDir, "test.data")
	df := &utils.DataFile{
		Meta: &utils.DataMeta{Operator: "test", Duration: 60},
		Segments: []*utils.Segment{
			{
				StartTime: 10.0,
				EndTime:   15.0,
				FreqLow:   100,
				FreqHigh:  5000,
				Labels: []*utils.Label{
					{Species: "Kiwi", Certainty: 80, Filter: "myfilter", CallType: "Duet", Bookmark: false},
				},
			},
		},
	}
	if err := df.Write(tmpFile); err != nil {
		t.Fatalf("failed to write test file: %v", err)
	}
	// Test: Adding bookmark when false should set it to true
	bookmark := true
	result, err := CallsModify(CallsModifyInput{
		File:      tmpFile,
		Reviewer:  "tester",
		Filter:    "myfilter",
		Segment:   "10-15",
		Certainty: 80,
		Bookmark:  &bookmark,
	})
	if err != nil {
		t.Errorf("unexpected error: %v", err)
	}
	if result.Bookmark == nil || !*result.Bookmark {
		t.Errorf("expected bookmark=true in result, got %v", result.Bookmark)
	}
	// Verify bookmark is true in the file
	df2, err := utils.ParseDataFile(tmpFile)
	if err != nil {
		t.Fatalf("failed to parse file: %v", err)
	}
	if !df2.Segments[0].Labels[0].Bookmark {
		t.Errorf("bookmark should be true, got false")
	}
}
func TestCallsModifyCommentAdditive(t *testing.T) {
	// Create a temp .data file with an existing comment
	tmpDir := t.TempDir()
	tmpFile := filepath.Join(tmpDir, "test.data")
	df := &utils.DataFile{
		Meta: &utils.DataMeta{Operator: "test", Duration: 60},
		Segments: []*utils.Segment{
			{
				StartTime: 10.0,
				EndTime:   15.0,
				FreqLow:   100,
				FreqHigh:  5000,
				Labels: []*utils.Label{
					{Species: "Kiwi", Certainty: 80, Filter: "myfilter", Comment: "First observation"},
				},
			},
		},
	}
	if err := df.Write(tmpFile); err != nil {
		t.Fatalf("failed to write test file: %v", err)
	}
	// Test: Adding comment should be additive
	result, err := CallsModify(CallsModifyInput{
		File:      tmpFile,
		Reviewer:  "tester",
		Filter:    "myfilter",
		Segment:   "10-15",
		Certainty: 80,
		Comment:   "Good example",
	})
	if err != nil {
		t.Errorf("unexpected error: %v", err)
	}
	expectedComment := "First observation | Good example"
	if result.Comment != expectedComment {
		t.Errorf("expected comment=%q, got %q", expectedComment, result.Comment)
	}
	// Verify comment in file
	df2, err := utils.ParseDataFile(tmpFile)
	if err != nil {
		t.Fatalf("failed to parse file: %v", err)
	}
	if df2.Segments[0].Labels[0].Comment != expectedComment {
		t.Errorf("expected comment in file=%q, got %q", expectedComment, df2.Segments[0].Labels[0].Comment)
	}
}
func TestCallsModifyCommentAdditiveMultiple(t *testing.T) {
	// Create a temp .data file and add multiple comments
	tmpDir := t.TempDir()
	tmpFile := filepath.Join(tmpDir, "test.data")
	df := &utils.DataFile{
		Meta: &utils.DataMeta{Operator: "test", Duration: 60},
		Segments: []*utils.Segment{
			{
				StartTime: 10.0,
				EndTime:   15.0,
				FreqLow:   100,
				FreqHigh:  5000,
				Labels: []*utils.Label{
					{Species: "Kiwi", Certainty: 80, Filter: "myfilter"},
				},
			},
		},
	}
	if err := df.Write(tmpFile); err != nil {
		t.Fatalf("failed to write test file: %v", err)
	}
	// Add first comment
	_, err := CallsModify(CallsModifyInput{
		File:      tmpFile,
		Reviewer:  "tester",
		Filter:    "myfilter",
		Segment:   "10-15",
		Certainty: 80,
		Comment:   "First",
	})
	if err != nil {
		t.Fatalf("unexpected error on first comment: %v", err)
	}
	// Add second comment
	_, err = CallsModify(CallsModifyInput{
		File:      tmpFile,
		Reviewer:  "tester",
		Filter:    "myfilter",
		Segment:   "10-15",
		Certainty: 80,
		Comment:   "Second",
	})
	if err != nil {
		t.Fatalf("unexpected error on second comment: %v", err)
	}
	// Add third comment
	result, err := CallsModify(CallsModifyInput{
		File:      tmpFile,
		Reviewer:  "tester",
		Filter:    "myfilter",
		Segment:   "10-15",
		Certainty: 80,
		Comment:   "Third",
	})
	if err != nil {
		t.Fatalf("unexpected error on third comment: %v", err)
	}
	expectedComment := "First | Second | Third"
	if result.Comment != expectedComment {
		t.Errorf("expected comment=%q, got %q", expectedComment, result.Comment)
	}
}
func TestCallsModifyCommentTooLong(t *testing.T) {
	// Create a temp .data file with an existing long comment
	tmpDir := t.TempDir()
	tmpFile := filepath.Join(tmpDir, "test.data")
	existingComment := "This is a fairly long existing comment that takes up space"
	df := &utils.DataFile{
		Meta: &utils.DataMeta{Operator: "test", Duration: 60},
		Segments: []*utils.Segment{
			{
				StartTime: 10.0,
				EndTime:   15.0,
				FreqLow:   100,
				FreqHigh:  5000,
				Labels: []*utils.Label{
					{Species: "Kiwi", Certainty: 80, Filter: "myfilter", Comment: existingComment},
				},
			},
		},
	}
	if err := df.Write(tmpFile); err != nil {
		t.Fatalf("failed to write test file: %v", err)
	}
	// Test: Adding a long comment that would exceed 140 chars should fail
	longNewComment := "This is another very long comment that when combined with the existing one will exceed the limit"
	result, err := CallsModify(CallsModifyInput{
		File:      tmpFile,
		Reviewer:  "tester",
		Filter:    "myfilter",
		Segment:   "10-15",
		Certainty: 80,
		Comment:   longNewComment,
	})
	if err == nil {
		t.Errorf("expected error for combined comment exceeding 140 chars, got nil")
	}
	if result.Error == "" {
		t.Errorf("expected error message, got empty")
	}
	// Verify original comment is preserved
	df2, err := utils.ParseDataFile(tmpFile)
	if err != nil {
		t.Fatalf("failed to parse file: %v", err)
	}
	if df2.Segments[0].Labels[0].Comment != existingComment {
		t.Errorf("original comment should be preserved, got %q", df2.Segments[0].Labels[0].Comment)
	}
}
func TestCallsModifyPreservesBookmarkOnOtherChange(t *testing.T) {
	// Create a temp .data file with a bookmark
	tmpDir := t.TempDir()
	tmpFile := filepath.Join(tmpDir, "test.data")
	df := &utils.DataFile{
		Meta: &utils.DataMeta{Operator: "test", Duration: 60},
		Segments: []*utils.Segment{
			{
				StartTime: 10.0,
				EndTime:   15.0,
				FreqLow:   100,
				FreqHigh:  5000,
				Labels: []*utils.Label{
					{Species: "Kiwi", Certainty: 80, Filter: "myfilter", Bookmark: true},
				},
			},
		},
	}
	if err := df.Write(tmpFile); err != nil {
		t.Fatalf("failed to write test file: %v", err)
	}
	// Change certainty (without passing --bookmark) - bookmark should be preserved
	result, err := CallsModify(CallsModifyInput{
		File:      tmpFile,
		Reviewer:  "tester",
		Filter:    "myfilter",
		Segment:   "10-15",
		Certainty: 100,
		// No Bookmark set
	})
	if err != nil {
		t.Errorf("unexpected error: %v", err)
	}
	if result.Bookmark != nil {
		t.Errorf("bookmark should not be in output when not changed, got %v", result.Bookmark)
	}
	// Verify bookmark is still true in the file
	df2, err := utils.ParseDataFile(tmpFile)
	if err != nil {
		t.Fatalf("failed to parse file: %v", err)
	}
	if !df2.Segments[0].Labels[0].Bookmark {
		t.Errorf("bookmark should still be true after changing certainty, got false")
	}
}
func TestCallsModifyInvalidSegment(t *testing.T) {
	tmpDir := t.TempDir()
	tmpFile := filepath.Join(tmpDir, "test.data")
	df := &utils.DataFile{
		Meta: &utils.DataMeta{Operator: "test", Duration: 60},
		Segments: []*utils.Segment{
			{
				StartTime: 10.0,
				EndTime:   15.0,
				FreqLow:   100,
				FreqHigh:  5000,
				Labels: []*utils.Label{
					{Species: "Kiwi", Certainty: 80, Filter: "myfilter"},
				},
			},
		},
	}
	if err := df.Write(tmpFile); err != nil {
		t.Fatalf("failed to write test file: %v", err)
	}
	// Test: Non-existent segment should error
	result, err := CallsModify(CallsModifyInput{
		File:      tmpFile,
		Reviewer:  "tester",
		Filter:    "myfilter",
		Segment:   "99-100",
		Certainty: 80,
	})
	if err == nil {
		t.Errorf("expected error for non-existent segment, got nil")
	}
	if result.Error == "" {
		t.Errorf("expected error message, got empty")
	}
}

File deletion: calls_modify.go

BF:BFD[6.248737] → [6.458192:458231]

BF:BFD[6.458231] → [6.450700:450700]

B:BD[6.450700] → [6.450701:451720]

B:BD[6.451720] → [16.13693:13821]

∅:D[16.13821] → [6.451897:451920]

B:BD[6.451897] → [6.451897:451920]

B:BD[6.451920] → [16.13822:13864]

∅:D[16.13864] → [6.452006:452036]

B:BD[6.452006] → [6.452006:452036]

B:BD[6.452036] → [16.13865:13911]

∅:D[16.13911] → [6.452126:452154]

B:BD[6.452126] → [6.452126:452154]

B:BD[6.452154] → [16.13912:13956]

∅:D[16.13956] → [6.452242:452271]

B:BD[6.452242] → [6.452242:452271]

B:BD[6.452271] → [16.13957:14002]

∅:D[16.14002] → [6.452548:452551]

B:BD[6.452548] → [6.452548:452551]

B:BD[6.452601] → [6.452601:452632]

B:BD[6.452632] → [16.14003:14067]

∅:D[16.14067] → [6.452740:452793]

B:BD[6.452740] → [6.452740:452793]

B:BD[6.452793] → [16.14068:15939]

∅:D[16.15939] → [6.452934:452938]

B:BD[6.452934] → [6.452934:452938]

∅:D[16.16269] → [6.452938:452942]

B:BD[6.452938] → [6.452938:452942]

∅:D[16.16402] → [6.452942:453031]

B:BD[6.452942] → [6.452942:453031]

B:BD[6.453053] → [6.453053:453224]

B:BD[6.453245] → [6.453245:453426]

B:BD[6.453505] → [6.453505:453746]

B:BD[6.453923] → [6.453923:453927]

∅:D[16.16460] → [6.453927:454119]

B:BD[6.453927] → [6.453927:454119]

B:BD[6.454155] → [6.454155:454205]

B:BD[6.454205] → [16.16461:16532]

∅:D[16.16532] → [6.454972:454973]

B:BD[6.454972] → [6.454972:454973]

B:BD[6.454973] → [16.16533:16602]

∅:D[16.16602] → [6.455059:455174]

B:BD[6.455059] → [6.455059:455174]

B:BD[6.455174] → [16.16603:16760]

∅:D[16.16760] → [6.456276:456280]

B:BD[6.456276] → [6.456276:456280]

B:BD[6.456294] → [6.456294:456479]

∅:D[16.17029] → [6.456479:458191]

B:BD[6.456479] → [6.456479:458191]

B:BD[6.456479] → [16.16761:17029]

B:BD[6.453927] → [16.16403:16460]

B:BD[6.452942] → [16.16270:16402]

B:BD[6.452938] → [16.15940:16269]

package tools
import (
	"fmt"
	"math"
	"os"
	"strings"
	"skraak/utils"
)
// CallsModifyInput defines the input for the modify tool
type CallsModifyInput struct {
	File      string `json:"file"`
	Reviewer  string `json:"reviewer"`
	Filter    string `json:"filter"`
	Segment   string `json:"segment"`
	Certainty int    `json:"certainty"`
	Species   string `json:"species"`
	Bookmark  *bool  `json:"bookmark"`
	Comment   string `json:"comment"`
}
// CallsModifyOutput defines the output for the modify tool
type CallsModifyOutput struct {
	File          string `json:"file"`
	SegmentStart  int    `json:"segment_start"`
	SegmentEnd    int    `json:"segment_end"`
	Species       string `json:"species,omitempty"`
	CallType      string `json:"calltype,omitempty"`
	Certainty     int    `json:"certainty,omitempty"`
	Bookmark      *bool  `json:"bookmark,omitempty"`
	Comment       string `json:"comment,omitempty"`
	PreviousValue string `json:"previous_value,omitempty"`
	Error         string `json:"error,omitempty"`
}
// validateModifyInput checks required fields and comment constraints.
func validateModifyInput(input CallsModifyInput) error {
	if input.File == "" {
		return fmt.Errorf("--file is required")
	}
	if input.Reviewer == "" {
		return fmt.Errorf("--reviewer is required")
	}
	if input.Filter == "" {
		return fmt.Errorf("--filter is required")
	}
	if input.Segment == "" {
		return fmt.Errorf("--segment is required")
	}
	if len(input.Comment) > 140 {
		return fmt.Errorf("--comment must be 140 characters or less")
	}
	for i, r := range input.Comment {
		if r > 127 {
			return fmt.Errorf("--comment must be ASCII only (non-ASCII at position %d)", i)
		}
	}
	return nil
}
// resolveSpecies parses species+calltype from the input species string.
// If input species is empty, keeps the existing label values.
func resolveSpecies(inputSpecies string, label *utils.Label) (species, callType string) {
	if inputSpecies == "" {
		return label.Species, label.CallType
	}
	if before, after, ok := strings.Cut(inputSpecies, "+"); ok {
		return before, after
	}
	return inputSpecies, ""
}
// hasModifyChanges checks whether any field would actually change.
func hasModifyChanges(newSpecies, newCallType string, input CallsModifyInput, label *utils.Label) bool {
	if newSpecies != label.Species || newCallType != label.CallType {
		return true
	}
	if input.Certainty != label.Certainty {
		return true
	}
	if input.Bookmark != nil && *input.Bookmark != label.Bookmark {
		return true
	}
	if input.Comment != "" {
		return true
	}
	return false
}
// applyLabelChanges updates the label and data file, populating the output.
func applyLabelChanges(label *utils.Label, dataFile *utils.DataFile, input CallsModifyInput, newSpecies, newCallType string, output *CallsModifyOutput) error {
	dataFile.Meta.Reviewer = input.Reviewer
	label.Species = newSpecies
	label.CallType = newCallType
	output.Species = newSpecies
	output.CallType = newCallType
	label.Certainty = input.Certainty
	output.Certainty = input.Certainty
	if input.Bookmark != nil && *input.Bookmark != label.Bookmark {
		label.Bookmark = *input.Bookmark
		output.Bookmark = input.Bookmark
	}
	if input.Comment != "" {
		var newComment string
		if label.Comment != "" {
			newComment = label.Comment + " | " + input.Comment
		} else {
			newComment = input.Comment
		}
		if len(newComment) > 140 {
			return fmt.Errorf("combined comment exceeds 140 characters (%d)", len(newComment))
		}
	}
	output.File = input.File
	output.SegmentStart = startTime
	output.SegmentEnd = endTime
	if _, err := os.Stat(input.File); os.IsNotExist(err) {
		output.Error = fmt.Sprintf("File not found: %s", input.File)
		return output, fmt.Errorf("%s", output.Error)
	}
	dataFile, err := utils.ParseDataFile(input.File)
	if err != nil {
		output.Error = fmt.Sprintf("Failed to parse file: %v", err)
		return output, fmt.Errorf("%s", output.Error)
	}
	segment := findSegment(dataFile.Segments, startTime, endTime, input.Filter)
	if segment == nil {
		output.Error = fmt.Sprintf("No segment found matching time range %d-%d", startTime, endTime)
		return output, fmt.Errorf("%s", output.Error)
	}
	if targetLabel == nil {
		output.Error = fmt.Sprintf("No label found with filter '%s' in segment %d-%d", input.Filter, startTime, endTime)
		return output, fmt.Errorf("%s", output.Error)
	}
	output.PreviousValue = formatLabel(targetLabel)
	newSpecies, newCallType := resolveSpecies(input.Species, targetLabel)
	if !hasModifyChanges(newSpecies, newCallType, input, targetLabel) {
		output.Error = "No changes needed: all values already match"
		return output, fmt.Errorf("%s", output.Error)
	}
	if err := applyLabelChanges(targetLabel, dataFile, input, newSpecies, newCallType, &output); err != nil {
		output.Error = err.Error()
		return output, err
	}
	if err := dataFile.Write(input.File); err != nil {
		output.Error = fmt.Sprintf("Failed to save file: %v", err)
		return output, fmt.Errorf("%s", output.Error)
	}
	return output, nil
}
// parseSegmentRange parses "12-15" format into start and end integers
func parseSegmentRange(s string) (int, int, error) {
	parts := strings.Split(s, "-")
	if len(parts) != 2 {
		return 0, 0, fmt.Errorf("invalid segment format: %s (expected start-end, e.g., 12-15)", s)
	}
	var start, end int
	if _, err := fmt.Sscanf(parts[0], "%d", &start); err != nil {
		return 0, 0, fmt.Errorf("invalid start time: %s", parts[0])
	}
	if _, err := fmt.Sscanf(parts[1], "%d", &end); err != nil {
		return 0, 0, fmt.Errorf("invalid end time: %s", parts[1])
	}
	if start < 0 || end < 0 {
		return 0, 0, fmt.Errorf("times must be non-negative")
	}
	if start >= end {
		return 0, 0, fmt.Errorf("start time must be less than end time")
	}
	return start, end, nil
}
// findSegment finds a segment matching the time range using floor/ceil matching.
// It also checks that the segment contains a label with the specified filter,
// so that duplicate segments (same time range, different filters) are resolved correctly.
func findSegment(segments []*utils.Segment, startTime, endTime int, filter string) *utils.Segment {
	for _, seg := range segments {
		segStart := int(math.Floor(seg.StartTime))
		segEnd := int(math.Ceil(seg.EndTime))
		if segEnd == segStart {
			segEnd = segStart + 1 // minimum 1 second
		}
		if segStart == startTime && segEnd == endTime {
			for _, label := range seg.Labels {
				if label.Filter == filter {
					return seg
				}
			}
		}
	}
	return nil
}
// formatLabel formats a label for display
func formatLabel(label *utils.Label) string {
	result := label.Species
	if label.CallType != "" {
		result += "+" + label.CallType
	}
	result += fmt.Sprintf(" (%d%%)", label.Certainty)
	return result
}
}
// findLabelByFilter finds the first label matching the given filter in a segment.
func findLabelByFilter(segment *utils.Segment, filter string) *utils.Label {
	for _, label := range segment.Labels {
		if label.Filter == filter {
			return label
		}
	}
	return nil
	targetLabel := findLabelByFilter(segment, input.Filter)
	startTime, endTime, err := parseSegmentRange(input.Segment)
	if err != nil {
		output.Error = err.Error()
		return output, err
	}
		label.Comment = newComment
		output.Comment = newComment
	}
	return nil
}
// CallsModify modifies a label in a .data file
func CallsModify(input CallsModifyInput) (CallsModifyOutput, error) {
	var output CallsModifyOutput
	if err := validateModifyInput(input); err != nil {
		output.Error = err.Error()
		return output, err

File deletion: calls_from_raven.go

BF:BFD[6.248737] → [6.471700:471743]

BF:BFD[6.471743] → [6.458233:458233]

B:BD[6.458233] → [6.458234:458297]

B:BD[6.458297] → [11.21390:21401]

∅:D[11.21401] → [6.458305:458316]

B:BD[6.458305] → [6.458305:458316]

B:BD[6.458339] → [6.458339:459304]

B:BD[6.460898] → [6.460898:460900]

B:BD[6.462027] → [6.462027:462028]

B:BD[6.462028] → [11.21402:21491]

∅:D[11.21491] → [6.462134:462135]

B:BD[6.462134] → [6.462134:462135]

B:BD[6.462135] → [11.21492:21544]

∅:D[11.21544] → [6.462154:462155]

B:BD[6.462154] → [6.462154:462155]

B:BD[6.462155] → [11.21545:21609]

∅:D[11.21609] → [6.466500:466785]

B:BD[6.466500] → [6.466500:466785]

B:BD[6.466785] → [11.21610:21986]

∅:D[11.21986] → [6.467244:467245]

B:BD[6.467244] → [6.467244:467245]

B:BD[6.467245] → [11.21987:22739]

∅:D[11.22739] → [18.16095:16292]

B:BD[6.467245] → [18.16095:16292]

∅:D[18.16292] → [6.467509:467510]

B:BD[6.467509] → [6.467509:467510]

∅:D[18.16546] → [6.467510:467580]

B:BD[6.467510] → [6.467510:467580]

B:BD[6.467580] → [18.16547:16571]

∅:D[18.16571] → [6.467600:467623]

B:BD[6.467600] → [6.467600:467623]

B:BD[6.467623] → [18.16572:16594]

∅:D[18.16594] → [6.467641:467665]

B:BD[6.467641] → [6.467641:467665]

B:BD[6.467665] → [18.16595:16617]

∅:D[18.16617] → [6.467683:467708]

B:BD[6.467683] → [6.467683:467708]

B:BD[6.467708] → [18.16618:16641]

∅:D[18.16641] → [6.467727:467745]

B:BD[6.467727] → [6.467727:467745]

B:BD[6.467745] → [18.16642:16664]

∅:D[18.16664] → [6.467763:467770]

B:BD[6.467763] → [6.467763:467770]

B:BD[6.467770] → [18.16665:16809]

∅:D[18.16809] → [6.467917:467920]

B:BD[6.467917] → [6.467917:467920]

∅:D[18.16829] → [6.467920:467921]

B:BD[6.467920] → [6.467920:467921]

B:BD[6.467921] → [18.16830:17026]

∅:D[18.17026] → [6.467941:468094]

B:BD[6.467941] → [6.467941:468094]

B:BD[6.468094] → [18.17027:17064]

∅:D[18.17064] → [6.468127:468144]

B:BD[6.468127] → [6.468127:468144]

B:BD[6.468144] → [18.17065:17143]

∅:D[18.17143] → [6.468357:468361]

B:BD[6.468357] → [6.468357:468361]

B:BD[6.469071] → [6.469071:469113]

B:BD[6.469114] → [6.469114:469153]

B:BD[6.469153] → [18.17144:17200]

∅:D[18.17200] → [6.469223:469226]

B:BD[6.469223] → [6.469223:469226]

∅:D[18.17227] → [6.469226:469227]

B:BD[6.469226] → [6.469226:469227]

B:BD[6.469227] → [18.17228:17411]

B:BD[18.17411] → [11.22740:22825]

∅:D[11.22825] → [18.17498:17594]

B:BD[18.17498] → [18.17498:17594]

B:BD[18.17594] → [11.22826:22935]

∅:D[11.22935] → [18.17677:17769]

B:BD[18.17677] → [18.17677:17769]

B:BD[18.17769] → [11.22936:22960]

∅:D[11.22960] → [18.17769:17827]

B:BD[18.17769] → [18.17769:17827]

B:BD[18.17827] → [11.22961:23044]

∅:D[11.23044] → [18.17911:18005]

B:BD[18.17911] → [18.17911:18005]

B:BD[18.18005] → [11.23045:23069]

∅:D[11.23069] → [18.18005:18068]

B:BD[18.18005] → [18.18005:18068]

B:BD[18.18068] → [11.23070:23155]

∅:D[11.23155] → [18.18154:18250]

B:BD[18.18154] → [18.18154:18250]

B:BD[18.18250] → [11.23156:23182]

∅:D[18.18250] → [6.469308:469311]

∅:D[11.23182] → [6.469308:469311]

B:BD[6.469308] → [6.469308:469311]

∅:D[18.18308] → [6.469311:469312]

B:BD[6.469311] → [6.469311:469312]

B:BD[6.469312] → [18.18309:18449]

∅:D[18.18449] → [6.469418:469452]

B:BD[6.469418] → [6.469418:469452]

B:BD[6.469479] → [6.469479:469545]

B:BD[6.469586] → [6.469586:469698]

∅:D[18.18803] → [6.469698:469701]

B:BD[6.469698] → [6.469698:469701]

∅:D[18.18877] → [6.469701:469702]

B:BD[6.469701] → [6.469701:469702]

B:BD[6.469702] → [18.18878:19261]

∅:D[18.19261] → [6.469942:469945]

B:BD[6.469942] → [6.469942:469945]

∅:D[18.19326] → [6.469945:469965]

B:BD[6.469945] → [6.469945:469965]

B:BD[6.469965] → [18.19327:19358]

∅:D[18.19358] → [6.470019:470023]

B:BD[6.470019] → [6.470019:470023]

B:BD[6.470081] → [6.470081:470165]

B:BD[6.470165] → [18.19359:19390]

∅:D[18.19390] → [6.470232:470267]

B:BD[6.470232] → [6.470232:470267]

B:BD[6.470303] → [6.470303:470360]

B:BD[6.470360] → [18.19391:19450]

∅:D[18.19450] → [6.470446:470494]

B:BD[6.470446] → [6.470446:470494]

B:BD[6.470528] → [6.470528:470645]

B:BD[6.470686] → [6.470686:470950]

∅:D[18.19737] → [6.470950:471699]

B:BD[6.470950] → [6.470950:471699]

B:BD[6.470950] → [18.19451:19737]

B:BD[6.469945] → [18.19262:19326]

B:BD[6.469701] → [18.18804:18877]

B:BD[6.469698] → [18.18450:18803]

B:BD[6.469311] → [18.18251:18308]

B:BD[6.469226] → [18.17201:17227]

B:BD[6.467920] → [18.16810:16829]

B:BD[6.467510] → [18.16293:16546]

package tools
import (
	"bufio"
	"fmt"
	"os"
	"path/filepath"
	"strconv"
	"strings"
	"skraak/utils"
)
// CallsFromRavenInput defines the input for the calls-from-raven tool
type CallsFromRavenInput struct {
	Folder          string          `json:"folder"`
	File            string          `json:"file"`
	Delete          bool            `json:"delete"`
	ProgressHandler ProgressHandler `json:"-"` // Optional progress callback
}
// CallsFromRavenOutput defines the output for the calls-from-raven tool
type CallsFromRavenOutput struct {
	Calls            []ClusteredCall `json:"calls"`
	TotalCalls       int             `json:"total_calls"`
	SpeciesCount     map[string]int  `json:"species_count"`
	DataFilesWritten int             `json:"data_files_written"`
	DataFilesSkipped int             `json:"data_files_skipped"`
	FilesProcessed   int             `json:"files_processed"`
	FilesDeleted     int             `json:"files_deleted"`
	Filter           string          `json:"filter"`
	Error            *string         `json:"error,omitempty"`
}
// ravenSource implements CallSource for Raven selection files
type ravenSource struct{}
func (ravenSource) Name() string { return "Raven" }
func (ravenSource) FindFiles(folder string) ([]string, error) {
	var files []string
	entries, err := os.ReadDir(folder)
	if err != nil {
		return nil, err
	}
	for _, entry := range entries {
		name := entry.Name()
		if strings.HasSuffix(name, ".selections.txt") {
			files = append(files, filepath.Join(folder, name))
		}
	}
	return files, nil
}
func (ravenSource) ProcessFile(ravenFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
	return processRavenFileCached(ravenFile, cache)
}
// CallsFromRaven processes Raven selection files and writes .data files
func CallsFromRaven(input CallsFromRavenInput) (CallsFromRavenOutput, error) {
	src := ravenSource{}
	commonInput := CallsFromSourceInput(input)
	commonOutput, err := callsFromSource(src, commonInput)
	// Convert to Raven-specific output type
	var output CallsFromRavenOutput
	output.Calls = commonOutput.Calls
	output.TotalCalls = commonOutput.TotalCalls
	output.SpeciesCount = commonOutput.SpeciesCount
	output.DataFilesWritten = commonOutput.DataFilesWritten
	output.DataFilesSkipped = commonOutput.DataFilesSkipped
	output.FilesProcessed = commonOutput.FilesProcessed
	output.FilesDeleted = commonOutput.FilesDeleted
	output.Filter = commonOutput.Filter
	output.Error = commonOutput.Error
	return output, err
}
// RavenSelection represents a single Raven selection
type RavenSelection struct {
	StartTime float64
	EndTime   float64
	FreqLow   float64
	FreqHigh  float64
	Species   string
}
// ravenColumnIndices holds the column index positions for a Raven file
type ravenColumnIndices struct {
	beginTimeIdx int
	endTimeIdx   int
	lowFreqIdx   int
	highFreqIdx  int
	speciesIdx   int
}
	for i, col := range header {
		switch col {
		case "Begin Time (s)":
			idx.beginTimeIdx = i
		case "End Time (s)":
			idx.endTimeIdx = i
		case "Low Freq (Hz)":
			idx.lowFreqIdx = i
		case "High Freq (Hz)":
			idx.highFreqIdx = i
		case "Species":
			idx.speciesIdx = i
		}
	}
	if idx.beginTimeIdx == -1 || idx.endTimeIdx == -1 || idx.speciesIdx == -1 {
		return idx, fmt.Errorf("missing required columns in Raven file")
	}
// parseRavenSelections reads all selection rows from a scanner and returns parsed selections
func parseRavenSelections(scanner *bufio.Scanner, idx ravenColumnIndices) ([]RavenSelection, error) {
	var selections []RavenSelection
	for scanner.Scan() {
		line := scanner.Text()
		if line == "" {
			continue
		}
		fields := strings.Split(line, "\t")
		if len(fields) <= idx.speciesIdx {
			continue
		}
		sel, err := parseRavenRow(fields, idx)
		if err != nil {
			return nil, err
		}
		selections = append(selections, sel)
	}
	if err := scanner.Err(); err != nil {
		return nil, fmt.Errorf("error reading file: %w", err)
	}
// parseRavenRow parses a single tab-separated row into a RavenSelection
func parseRavenRow(fields []string, idx ravenColumnIndices) (RavenSelection, error) {
	var sel RavenSelection
	startTime, err := strconv.ParseFloat(fields[idx.beginTimeIdx], 64)
	if err != nil {
		return sel, fmt.Errorf("failed to parse begin time %q: %w", fields[idx.beginTimeIdx], err)
	}
	sel.StartTime = startTime
	endTime, err := strconv.ParseFloat(fields[idx.endTimeIdx], 64)
	if err != nil {
		return sel, fmt.Errorf("failed to parse end time %q: %w", fields[idx.endTimeIdx], err)
	}
	sel.EndTime = endTime
	if idx.lowFreqIdx >= 0 && idx.lowFreqIdx < len(fields) {
		freqLow, err := strconv.ParseFloat(fields[idx.lowFreqIdx], 64)
		if err != nil {
			return sel, fmt.Errorf("failed to parse low freq %q: %w", fields[idx.lowFreqIdx], err)
		}
		sel.FreqLow = freqLow
	}
	if idx.highFreqIdx >= 0 && idx.highFreqIdx < len(fields) {
		freqHigh, err := strconv.ParseFloat(fields[idx.highFreqIdx], 64)
		if err != nil {
			return sel, fmt.Errorf("failed to parse high freq %q: %w", fields[idx.highFreqIdx], err)
		}
		sel.FreqHigh = freqHigh
	}
// deriveWAVBaseName extracts the base WAV filename from a Raven .selections.txt filename
func deriveWAVBaseName(ravenFile string) string {
	base := filepath.Base(ravenFile)
	nameWithoutSuffix := strings.TrimSuffix(base, ".selections.txt")
	idx := strings.Index(nameWithoutSuffix, ".Table.")
	if idx > 0 {
		nameWithoutSuffix = nameWithoutSuffix[:idx]
	}
	if !scanner.Scan() {
		return nil, false, false, fmt.Errorf("empty file")
	}
	header := strings.Split(scanner.Text(), "\t")
	idx, err := parseRavenHeader(header)
	if err != nil {
		return nil, false, false, err
	}
	selections, err := parseRavenSelections(scanner, idx)
	if err != nil {
		return nil, false, false, err
	}
	if len(selections) == 0 {
		return nil, false, true, nil
	}
	if wavPath == "" {
		return nil, false, true, nil
	}
	sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
	if err != nil {
		return nil, false, true, nil
	}
	dataPath := wavPath + ".data"
	segments := buildRavenSegments(selections, sampleRate)
	meta := AviaNZMeta{Operator: "Raven", Duration: duration}
	reviewer := "None"
	meta.Reviewer = &reviewer
	if err := writeDotDataFileSafe(dataPath, segments, "Raven", meta); err != nil {
		return nil, false, false, err
	}
	var calls []ClusteredCall
	for _, sel := range selections {
		calls = append(calls, ClusteredCall{
			File:      wavPath,
			StartTime: sel.StartTime,
			EndTime:   sel.EndTime,
			EbirdCode: sel.Species,
			Segments:  1,
		})
	}
	return calls, true, false, nil
}
// buildRavenSegments converts Raven selections to AviaNZ segments
func buildRavenSegments(selections []RavenSelection, sampleRate int) []AviaNZSegment {
	var segments []AviaNZSegment
	for _, sel := range selections {
		labels := []AviaNZLabel{
			{
				Species:   sel.Species,
				Certainty: 70, // Default certainty for Raven (no confidence metric)
				Filter:    "Raven",
			},
		}
		// Use frequency range from Raven, or full band if not specified
		freqLow := sel.FreqLow
		freqHigh := sel.FreqHigh
		if freqLow == 0 && freqHigh == 0 {
			freqHigh = float64(sampleRate)
		}
		segment := AviaNZSegment{
			sel.StartTime,
			sel.EndTime,
			freqLow,
			freqHigh,
			labels,
		}
		segments = append(segments, segment)
	}
	return segments
}
}
// resolveWAVPath finds the WAV file corresponding to a Raven file
func resolveWAVPath(ravenFile string, cache *DirCache) string {
	baseName := deriveWAVBaseName(ravenFile)
	if cache != nil {
		return cache.FindWAV(baseName)
	}
	return findWAVFile(filepath.Dir(ravenFile), baseName)
	// Find WAV file
	wavPath := resolveWAVPath(ravenFile, cache)
	defer func() { _ = file.Close() }()
	scanner := bufio.NewScanner(file)
	}
	return nameWithoutSuffix
}
// processRavenFileCached processes a single Raven selection file using a DirCache for WAV lookup
func processRavenFileCached(ravenFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
	file, err := os.Open(ravenFile)
	if err != nil {
		return nil, false, false, fmt.Errorf("failed to open file: %w", err)
	sel.Species = fields[idx.speciesIdx]
	return sel, nil
}
	return selections, nil
}
	return idx, nil
}
// parseRavenHeader finds column indices from a tab-separated header line
func parseRavenHeader(header []string) (ravenColumnIndices, error) {
	idx := ravenColumnIndices{beginTimeIdx: -1, endTimeIdx: -1, lowFreqIdx: -1, highFreqIdx: -1, speciesIdx: -1}

File deletion: calls_from_preds_test.go

BF:BFD[6.248737] → [6.483453:483501]

BF:BFD[6.483501] → [6.471745:471745]

B:BD[6.471745] → [6.471746:483452]

package tools
import (
	"os"
	"path/filepath"
	"testing"
	"skraak/utils"
)
func TestCallsFromPreds_EmptyFilterError(t *testing.T) {
	// Create a temp CSV file
	tmpDir := t.TempDir()
	csvPath := filepath.Join(tmpDir, "preds.csv")
	csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
	if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
		t.Fatal(err)
	}
	// Create a dummy WAV file (minimal valid WAV)
	wavPath := filepath.Join(tmpDir, "test.wav")
	createMinimalWAV(t, wavPath, 44100, 10.0)
	// Test with empty filter (should error)
	input := CallsFromPredsInput{
		CSVPath:         csvPath,
		Filter:          "",
		WriteDotData:    true,
		ProgressHandler: nil,
	}
	output, err := CallsFromPreds(input)
	// Should return error
	if err == nil {
		t.Error("expected error for empty filter, got nil")
	}
	if output.Error == nil || *output.Error == "" {
		t.Error("expected error message in output, got empty")
	}
}
func TestCallsFromPreds_NewDataFile(t *testing.T) {
	// Create a temp CSV file
	tmpDir := t.TempDir()
	csvPath := filepath.Join(tmpDir, "predsST_test-filter_2025-01-01.csv")
	csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
	if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
		t.Fatal(err)
	}
	// Create a dummy WAV file
	wavPath := filepath.Join(tmpDir, "test.wav")
	createMinimalWAV(t, wavPath, 44100, 10.0)
	// Test with filter parsed from filename
	input := CallsFromPredsInput{
		CSVPath:         csvPath,
		Filter:          "", // Will parse from filename
		WriteDotData:    true,
		ProgressHandler: nil,
	}
	output, err := CallsFromPreds(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.DataFilesWritten != 1 {
		t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
	}
	if output.Filter != "test-filter" {
		t.Errorf("expected filter 'test-filter', got '%s'", output.Filter)
	}
	// Verify .data file was created
	dataPath := wavPath + ".data"
	if _, err := os.Stat(dataPath); os.IsNotExist(err) {
		t.Error("expected .data file to be created")
	}
	// Verify content
	df, err := utils.ParseDataFile(dataPath)
	if err != nil {
		t.Fatalf("failed to parse .data file: %v", err)
	}
	if len(df.Segments) != 1 {
		t.Errorf("expected 1 segment, got %d", len(df.Segments))
	}
	if len(df.Segments[0].Labels) != 1 {
		t.Errorf("expected 1 label, got %d", len(df.Segments[0].Labels))
	}
	if df.Segments[0].Labels[0].Filter != "test-filter" {
		t.Errorf("expected filter 'test-filter', got '%s'", df.Segments[0].Labels[0].Filter)
	}
}
func TestCallsFromPreds_ExistingDataFileSameFilter(t *testing.T) {
	// Create a temp CSV file
	tmpDir := t.TempDir()
	csvPath := filepath.Join(tmpDir, "predsST_existing-filter_2025-01-01.csv")
	csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
	if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
		t.Fatal(err)
	}
	// Create a dummy WAV file
	wavPath := filepath.Join(tmpDir, "test.wav")
	createMinimalWAV(t, wavPath, 44100, 10.0)
	// Create existing .data file with same filter
	dataPath := wavPath + ".data"
	existingData := `[
		{"Operator": "Manual", "Reviewer": "David", "Duration": 10.0},
		[5.0, 8.0, 0, 44100, [{"species": "morepork", "certainty": 90, "filter": "existing-filter"}]]
	]`
	if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
		t.Fatal(err)
	}
	// Test with same filter (should error)
	input := CallsFromPredsInput{
		CSVPath:         csvPath,
		Filter:          "", // Will parse from filename -> "existing-filter"
		WriteDotData:    true,
		ProgressHandler: nil,
	}
	output, err := CallsFromPreds(input)
	// Should return error
	if err == nil {
		t.Error("expected error for same filter, got nil")
	}
	if output.Error == nil {
		t.Error("expected error message in output")
	}
	// Verify original .data file is unchanged
	df, err := utils.ParseDataFile(dataPath)
	if err != nil {
		t.Fatalf("failed to parse .data file: %v", err)
	}
	if len(df.Segments) != 1 {
		t.Errorf("expected original 1 segment, got %d", len(df.Segments))
	}
	if df.Segments[0].Labels[0].Species != "morepork" {
		t.Errorf("expected original species 'morepork', got '%s'", df.Segments[0].Labels[0].Species)
	}
}
func TestCallsFromPreds_ExistingDataFileDifferentFilter(t *testing.T) {
	// Create a temp CSV file
	tmpDir := t.TempDir()
	csvPath := filepath.Join(tmpDir, "predsST_new-filter_2025-01-01.csv")
	csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
	if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
		t.Fatal(err)
	}
	// Create a dummy WAV file
	wavPath := filepath.Join(tmpDir, "test.wav")
	createMinimalWAV(t, wavPath, 44100, 10.0)
	// Create existing .data file with different filter
	dataPath := wavPath + ".data"
	existingData := `[
		{"Operator": "Manual", "Reviewer": "David", "Duration": 10.0},
		[5.0, 8.0, 0, 44100, [{"species": "morepork", "certainty": 90, "filter": "old-filter"}]]
	]`
	if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
		t.Fatal(err)
	}
	// Test with different filter (should merge)
	input := CallsFromPredsInput{
		CSVPath:         csvPath,
		Filter:          "", // Will parse from filename -> "new-filter"
		WriteDotData:    true,
		ProgressHandler: nil,
	}
	output, err := CallsFromPreds(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.DataFilesWritten != 1 {
		t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
	}
	// Verify .data file has merged content
	df, err := utils.ParseDataFile(dataPath)
	if err != nil {
		t.Fatalf("failed to parse .data file: %v", err)
	}
	if len(df.Segments) != 2 {
		t.Errorf("expected 2 segments after merge, got %d", len(df.Segments))
	}
	// Check segments are sorted by start time
	if df.Segments[0].StartTime > df.Segments[1].StartTime {
		t.Error("expected segments to be sorted by start time")
	}
	// Check both filters are present
	filters := make(map[string]bool)
	for _, seg := range df.Segments {
		for _, label := range seg.Labels {
			filters[label.Filter] = true
		}
	}
	if !filters["old-filter"] {
		t.Error("expected 'old-filter' to be present")
	}
	if !filters["new-filter"] {
		t.Error("expected 'new-filter' to be present")
	}
}
func TestCallsFromPreds_ExistingDataFileParseError(t *testing.T) {
	// Create a temp CSV file
	tmpDir := t.TempDir()
	csvPath := filepath.Join(tmpDir, "predsST_test-filter_2025-01-01.csv")
	csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
	if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
		t.Fatal(err)
	}
	// Create a dummy WAV file
	wavPath := filepath.Join(tmpDir, "test.wav")
	createMinimalWAV(t, wavPath, 44100, 10.0)
	// Create corrupted .data file
	dataPath := wavPath + ".data"
	corruptedData := `this is not valid json`
	if err := os.WriteFile(dataPath, []byte(corruptedData), 0644); err != nil {
		t.Fatal(err)
	}
	// Test (should error due to parse failure)
	input := CallsFromPredsInput{
		CSVPath:         csvPath,
		Filter:          "",
		WriteDotData:    true,
		ProgressHandler: nil,
	}
	output, err := CallsFromPreds(input)
	// Should return error
	if err == nil {
		t.Error("expected error for corrupted .data file, got nil")
	}
	if output.Error == nil {
		t.Error("expected error message in output")
	}
	// Verify original file is unchanged
	content, err := os.ReadFile(dataPath)
	if err != nil {
		t.Fatal(err)
	}
	if string(content) != corruptedData {
		t.Error("expected corrupted file to remain unchanged")
	}
}
func TestCallsFromPreds_ExplicitFilter(t *testing.T) {
	// Create a temp CSV file with non-standard name
	tmpDir := t.TempDir()
	csvPath := filepath.Join(tmpDir, "predictions.csv")
	csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
	if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
		t.Fatal(err)
	}
	// Create a dummy WAV file
	wavPath := filepath.Join(tmpDir, "test.wav")
	createMinimalWAV(t, wavPath, 44100, 10.0)
	// Test with explicit filter
	input := CallsFromPredsInput{
		CSVPath:         csvPath,
		Filter:          "my-custom-filter",
		WriteDotData:    true,
		ProgressHandler: nil,
	}
	output, err := CallsFromPreds(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.Filter != "my-custom-filter" {
		t.Errorf("expected filter 'my-custom-filter', got '%s'", output.Filter)
	}
	// Verify .data file uses explicit filter
	dataPath := wavPath + ".data"
	df, err := utils.ParseDataFile(dataPath)
	if err != nil {
		t.Fatalf("failed to parse .data file: %v", err)
	}
	if df.Segments[0].Labels[0].Filter != "my-custom-filter" {
		t.Errorf("expected filter 'my-custom-filter' in .data file, got '%s'", df.Segments[0].Labels[0].Filter)
	}
}
func TestCallsFromPreds_NonParsableFilenameNoFilter(t *testing.T) {
	// Create a temp CSV file with non-standard name that can't be parsed
	tmpDir := t.TempDir()
	csvPath := filepath.Join(tmpDir, "random_name.csv")
	csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
	if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
		t.Fatal(err)
	}
	// Create a dummy WAV file
	wavPath := filepath.Join(tmpDir, "test.wav")
	createMinimalWAV(t, wavPath, 44100, 10.0)
	// Test with no filter and non-parsable filename (should error)
	input := CallsFromPredsInput{
		CSVPath:         csvPath,
		Filter:          "",
		WriteDotData:    true,
		ProgressHandler: nil,
	}
	output, err := CallsFromPreds(input)
	// Should return error
	if err == nil {
		t.Error("expected error for unparsable filename with no filter, got nil")
	}
	if output.Error == nil {
		t.Error("expected error message in output")
	}
}
// createMinimalWAV creates a minimal valid WAV file for testing
func createMinimalWAV(t *testing.T, path string, sampleRate int, duration float64) {
	t.Helper()
	numSamples := int(float64(sampleRate) * duration)
	dataSize := numSamples * 2 // 16-bit mono
	// WAV header (44 bytes)
	header := make([]byte, 44)
	// RIFF header
	copy(header[0:4], "RIFF")
	totalSize := uint32(36 + dataSize)
	header[4] = byte(totalSize)
	header[5] = byte(totalSize >> 8)
	header[6] = byte(totalSize >> 16)
	header[7] = byte(totalSize >> 24)
	copy(header[8:12], "WAVE")
	// fmt chunk
	copy(header[12:16], "fmt ")
	chunkSize := uint32(16)
	header[16] = byte(chunkSize)
	header[17] = byte(chunkSize >> 8)
	header[18] = byte(chunkSize >> 16)
	header[19] = byte(chunkSize >> 24)
	audioFormat := uint16(1) // PCM
	header[20] = byte(audioFormat)
	header[21] = byte(audioFormat >> 8)
	numChannels := uint16(1)
	header[22] = byte(numChannels)
	header[23] = byte(numChannels >> 8)
	header[24] = byte(sampleRate)
	header[25] = byte(sampleRate >> 8)
	header[26] = byte(sampleRate >> 16)
	header[27] = byte(sampleRate >> 24)
	byteRate := uint32(sampleRate * 2)
	header[28] = byte(byteRate)
	header[29] = byte(byteRate >> 8)
	header[30] = byte(byteRate >> 16)
	header[31] = byte(byteRate >> 24)
	blockAlign := uint16(2)
	header[32] = byte(blockAlign)
	header[33] = byte(blockAlign >> 8)
	bitsPerSample := uint16(16)
	header[34] = byte(bitsPerSample)
	header[35] = byte(bitsPerSample >> 8)
	// data chunk
	copy(header[36:40], "data")
	header[40] = byte(dataSize)
	header[41] = byte(dataSize >> 8)
	header[42] = byte(dataSize >> 16)
	header[43] = byte(dataSize >> 24)
	// Create file with header and silence
	file, err := os.Create(path)
	if err != nil {
		t.Fatal(err)
	}
	defer file.Close()
	if _, err := file.Write(header); err != nil {
		t.Fatal(err)
	}
	// Write silence (zeros)
	silence := make([]byte, dataSize)
	if _, err := file.Write(silence); err != nil {
		t.Fatal(err)
	}
}

File deletion: calls_from_preds.go

BF:BFD[6.248737] → [6.504729:504772]

BF:BFD[6.504772] → [6.483503:483503]

B:BD[6.483503] → [6.483504:485788]

B:BD[18.19741] → [18.19741:19875]

∅:D[18.19875] → [6.486294:486667]

B:BD[6.486294] → [6.486294:486667]

B:BD[6.486696] → [6.486696:486897]

B:BD[6.486897] → [18.19876:19940]

∅:D[18.19940] → [6.486952:486969]

B:BD[6.486952] → [6.486952:486969]

B:BD[6.486969] → [18.19941:19965]

∅:D[18.19965] → [6.487029:487054]

B:BD[6.487029] → [6.487029:487054]

B:BD[6.487054] → [18.19966:21296]

∅:D[18.21296] → [6.487096:487137]

B:BD[6.487096] → [6.487096:487137]

B:BD[6.487150] → [6.487150:487181]

B:BD[6.487181] → [18.21297:21324]

∅:D[18.21324] → [6.487247:487248]

B:BD[6.487247] → [6.487247:487248]

B:BD[6.487264] → [6.487264:487311]

B:BD[6.487311] → [18.21325:21510]

∅:D[18.21510] → [6.487440:487444]

B:BD[6.487440] → [6.487440:487444]

B:BD[6.487444] → [18.21511:21681]

∅:D[18.21681] → [6.487566:487567]

B:BD[6.487566] → [6.487566:487567]

B:BD[6.487567] → [18.21682:22095]

∅:D[18.22095] → [6.487681:487685]

B:BD[6.487681] → [6.487681:487685]

∅:D[18.22162] → [6.487685:487745]

B:BD[6.487685] → [6.487685:487745]

B:BD[6.487745] → [18.22163:22183]

∅:D[18.22183] → [6.487760:487781]

B:BD[6.487760] → [6.487760:487781]

B:BD[6.487781] → [18.22184:22209]

∅:D[18.22209] → [6.487801:487820]

B:BD[6.487801] → [6.487801:487820]

B:BD[6.487820] → [18.22210:22233]

∅:D[18.22233] → [6.487838:487849]

B:BD[6.487838] → [6.487838:487849]

B:BD[6.487876] → [6.487876:487922]

B:BD[6.487922] → [18.22234:22328]

∅:D[18.22328] → [6.488036:488044]

B:BD[6.488036] → [6.488036:488044]

B:BD[6.488044] → [18.22329:22494]

∅:D[18.22494] → [6.488246:488249]

B:BD[6.488246] → [6.488246:488249]

B:BD[6.488249] → [18.22495:22601]

∅:D[18.22601] → [6.488403:488406]

B:BD[6.488403] → [6.488403:488406]

∅:D[18.22622] → [6.488406:488407]

B:BD[6.488406] → [6.488406:488407]

B:BD[6.488407] → [18.22623:22902]

∅:D[18.22902] → [6.488630:488652]

B:BD[6.488630] → [6.488630:488652]

B:BD[6.488692] → [6.488692:488722]

B:BD[6.488722] → [18.22903:23040]

∅:D[18.23040] → [6.488888:488892]

B:BD[6.488888] → [6.488888:488892]

B:BD[6.488892] → [18.23041:23267]

∅:D[18.23267] → [6.489108:489109]

B:BD[6.489108] → [6.489108:489109]

B:BD[6.489109] → [18.23268:23422]

∅:D[18.23422] → [6.489350:489355]

B:BD[6.489350] → [6.489350:489355]

B:BD[6.489355] → [18.23423:23553]

∅:D[18.23553] → [6.489613:489614]

B:BD[6.489613] → [6.489613:489614]

B:BD[6.489614] → [18.23554:23594]

∅:D[18.23594] → [6.489709:489710]

B:BD[6.489709] → [6.489709:489710]

B:BD[6.489710] → [18.23595:24025]

∅:D[18.24025] → [6.489908:489912]

B:BD[6.489908] → [6.489908:489912]

B:BD[6.490170] → [6.490170:490173]

B:BD[6.490173] → [18.24026:24028]

∅:D[18.24028] → [6.490264:490265]

B:BD[6.490264] → [6.490264:490265]

B:BD[6.490265] → [18.24029:24276]

∅:D[18.24276] → [6.490310:490422]

B:BD[6.490310] → [6.490310:490422]

B:BD[6.490444] → [6.490444:490473]

B:BD[6.490509] → [6.490509:490568]

B:BD[6.490599] → [6.490599:490968]

B:BD[6.491008] → [6.491008:491205]

B:BD[6.491302] → [6.491302:491303]

B:BD[6.491303] → [18.24277:24308]

B:BD[6.492016] → [6.492016:495020]

B:BD[6.495020] → [11.23185:23224]

∅:D[11.23224] → [6.495061:504728]

B:BD[6.495061] → [6.495061:504728]

B:BD[6.488406] → [18.22602:22622]

B:BD[6.487685] → [18.22096:22162]

package tools
import (
	"encoding/csv"
	"encoding/json"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"sort"
	"strconv"
	"strings"
	"sync"
	"sync/atomic"
	"skraak/utils"
)
// Constants for clustering algorithm
const (
	CLUSTER_GAP_MULTIPLIER     = 2  // 3 Gap threshold = CLUSTER_GAP_MULTIPLIER * clip_duration. 3 for kiwi
	MIN_DETECTIONS_PER_CLUSTER = 0  // 1 = filter out single detections (used for kiwi, they have long calls 30s), 0 = let single detections pass through
	DEFAULT_CERTAINTY          = 70 // .data certainty:70
	DOT_DATA_WORKERS           = 8  // Number of parallel workers for .data file writing
)
// ClusteredCall represents a clustered bird call detection
type ClusteredCall struct {
	File      string  `json:"file"`
	StartTime float64 `json:"start_time"`
	EndTime   float64 `json:"end_time"`
	EbirdCode string  `json:"ebird_code"`
	Segments  int     `json:"segments"`
}
// CallsFromPredsInput defines the input for the calls-from-preds tool
type CallsFromPredsInput struct {
	CSVPath         string          `json:"csv_path"`
	Filter          string          `json:"filter"`
	WriteDotData    bool            `json:"write_dot_data"`
	GapMultiplier   int             `json:"gap_multiplier"`
	MinDetections   int             `json:"min_detections"`
	ProgressHandler ProgressHandler `json:"-"` // Optional progress callback (not serialized)
}
// ProgressHandler is a callback function for reporting progress during long operations
// processed: number of items processed so far
// total: total number of items to process
// message: optional status message
type ProgressHandler func(processed, total int, message string)
// CallsFromPredsOutput defines the output for the calls-from-preds tool
type CallsFromPredsOutput struct {
	Calls            []ClusteredCall `json:"calls"`
	TotalCalls       int             `json:"total_calls"`
	ClipDuration     float64         `json:"clip_duration"`
	GapThreshold     float64         `json:"gap_threshold"`
	SpeciesCount     map[string]int  `json:"species_count"`
	DataFilesWritten int             `json:"data_files_written"`
	DataFilesSkipped int             `json:"data_files_skipped"`
	Filter           string          `json:"filter"`
	Error            *string         `json:"error,omitempty"`
}
// AviaNZ .data file types
// predFileSpeciesKey groups detections by file and ebird code
type predFileSpeciesKey struct {
	File      string
	EbirdCode string
}
// CallsFromPreds reads a predictions CSV and clusters detections into continuous bird calls
func CallsFromPreds(input CallsFromPredsInput) (CallsFromPredsOutput, error) {
	var output CallsFromPredsOutput
	// Determine filter: use provided filter, or parse from CSV filename
	filter := input.Filter
	if filter == "" {
		filter = ParseFilterFromFilename(input.CSVPath)
	}
	if filter == "" {
		errMsg := "Filter must be specified via --filter flag or parsable from CSV filename"
		output.Error = &errMsg
		return output, fmt.Errorf("%s", errMsg)
	}
	output.Filter = filter
	_, detections, clipDuration, err := readPredCSV(input.CSVPath)
	if err != nil {
		errMsg := err.Error()
		output.Error = &errMsg
		return output, err
	}
	output.ClipDuration = clipDuration
	gapMultiplier := CLUSTER_GAP_MULTIPLIER
	if input.GapMultiplier > 0 {
		gapMultiplier = input.GapMultiplier
	}
	minDetections := MIN_DETECTIONS_PER_CLUSTER
	if input.MinDetections >= 0 {
		minDetections = input.MinDetections
	}
	gapThreshold := float64(gapMultiplier) * clipDuration
	output.GapThreshold = gapThreshold
	allCalls, speciesCount := clusterDetections(detections, clipDuration, gapThreshold, minDetections)
	output.Calls = allCalls
	output.TotalCalls = len(allCalls)
	output.SpeciesCount = speciesCount
	if input.WriteDotData {
		dataFilesWritten, dataFilesSkipped, err := writeDotFiles(input.CSVPath, filter, allCalls, input.ProgressHandler)
		if err != nil {
			errMsg := fmt.Sprintf("Error writing .data files: %v", err)
			output.Error = &errMsg
			return output, fmt.Errorf("%s", errMsg)
		}
		output.DataFilesWritten = dataFilesWritten
		output.DataFilesSkipped = dataFilesSkipped
	}
	return output, nil
}
// readPredCSV opens and reads a predictions CSV, returning column mappings, detections, and clip duration
func readPredCSV(csvPath string) (predCSVColumns, map[predFileSpeciesKey][]float64, float64, error) {
	file, err := os.Open(csvPath)
	if err != nil {
		return predCSVColumns{}, nil, 0, fmt.Errorf("failed to open CSV file: %w", err)
	}
	defer func() { _ = file.Close() }()
	reader := csv.NewReader(file)
	reader.ReuseRecord = true
	header, err := reader.Read()
	if err != nil {
		return predCSVColumns{}, nil, 0, fmt.Errorf("failed to read CSV header: %w", err)
	}
	cols, err := findPredCSVColumns(header)
	if err != nil {
		return predCSVColumns{}, nil, 0, err
	}
	detections, clipDuration, err := readPredCSVRows(reader, cols)
	if err != nil {
		return predCSVColumns{}, nil, 0, err
	}
	return cols, detections, clipDuration, nil
}
// predCSVColumns holds the column indices for a predictions CSV
type predCSVColumns struct {
	fileIdx      int
	startTimeIdx int
	endTimeIdx   int
	ebirdCodes   []string
	ebirdIdx     []int
}
// findPredCSVColumns parses the CSV header to find column indices
func findPredCSVColumns(header []string) (predCSVColumns, error) {
	cols := predCSVColumns{
		fileIdx:      -1,
		startTimeIdx: -1,
		endTimeIdx:   -1,
	}
	for i, col := range header {
		switch col {
		case "file":
			cols.fileIdx = i
		case "start_time":
			cols.startTimeIdx = i
		case "end_time":
			cols.endTimeIdx = i
		default:
			if ignoredColumns[col] {
				continue
			}
			cols.ebirdCodes = append(cols.ebirdCodes, col)
			cols.ebirdIdx = append(cols.ebirdIdx, i)
		}
	}
	if cols.fileIdx == -1 || cols.startTimeIdx == -1 || cols.endTimeIdx == -1 {
		return cols, fmt.Errorf("CSV must have 'file', 'start_time', and 'end_time' columns")
	}
	if len(cols.ebirdCodes) == 0 {
		return cols, fmt.Errorf("CSV must have at least one ebird code column")
	}
// readPredCSVRows reads all CSV data rows and returns detections grouped by file+species, plus clip duration
func readPredCSVRows(reader *csv.Reader, cols predCSVColumns) (map[predFileSpeciesKey][]float64, float64, error) {
	detections := make(map[predFileSpeciesKey][]float64)
	clipDuration := 0.0
	record, err := reader.Read()
	if err == io.EOF {
		return detections, 0, nil
	}
	if err != nil {
		return nil, 0, fmt.Errorf("failed to read first CSV row: %w", err)
	}
	startTime, _ := strconv.ParseFloat(record[cols.startTimeIdx], 64)
	endTime, _ := strconv.ParseFloat(record[cols.endTimeIdx], 64)
	clipDuration = endTime - startTime
	addDetectionsFromRow(record, cols, startTime, detections)
	for {
		record, err := reader.Read()
		if err == io.EOF {
			break
		}
		if err != nil {
			return nil, 0, fmt.Errorf("failed to read CSV row: %w", err)
		}
		startTime, _ = strconv.ParseFloat(record[cols.startTimeIdx], 64)
		addDetectionsFromRow(record, cols, startTime, detections)
	}
	return detections, clipDuration, nil
}
// addDetectionsFromRow adds positive detections from a single CSV row
func addDetectionsFromRow(record []string, cols predCSVColumns, startTime float64, detections map[predFileSpeciesKey][]float64) {
	fileName := record[cols.fileIdx]
	for i, idx := range cols.ebirdIdx {
		if record[idx] == "1" {
			key := predFileSpeciesKey{File: fileName, EbirdCode: cols.ebirdCodes[i]}
			detections[key] = append(detections[key], startTime)
		}
	}
}
// clusterDetections groups detections into clusters and produces sorted ClusteredCalls
func clusterDetections(detections map[predFileSpeciesKey][]float64, clipDuration, gapThreshold float64, minDetections int) ([]ClusteredCall, map[string]int) {
	var allCalls []ClusteredCall
	speciesCount := make(map[string]int)
	for key, startTimes := range detections {
		sort.Float64s(startTimes)
		clusters := clusterStartTimes(startTimes, gapThreshold)
		for _, cluster := range clusters {
			if len(cluster) <= minDetections {
				continue
			}
			call := ClusteredCall{
				File:      key.File,
				StartTime: cluster[0],
				EndTime:   cluster[len(cluster)-1] + clipDuration,
				EbirdCode: key.EbirdCode,
				Segments:  len(cluster),
			}
			allCalls = append(allCalls, call)
			speciesCount[key.EbirdCode]++
		}
	}
	sort.Slice(allCalls, func(i, j int) bool {
		if allCalls[i].File != allCalls[j].File {
			return allCalls[i].File < allCalls[j].File
		}
		return allCalls[i].StartTime < allCalls[j].StartTime
	})
	return allCalls, speciesCount
}
// DirCache caches directory entries for fast WAV file lookup.
// Scans the directory once and builds a map from lowercased basename to full filename.
// Safe for concurrent read-only use after construction.
type DirCache struct {
	dir    string
	wavMap map[string]string // lowercase basename -> filename with original case (e.g. "20230610_150000" -> "20230610_150000.WAV")
	dirMap map[string]string // lowercase basename -> filename for any file (used by from-raven for .selections.txt etc.)
}
// NewDirCache creates a DirCache by scanning the directory once.
func NewDirCache(dir string) *DirCache {
	entries, err := os.ReadDir(dir)
	if err != nil {
		return &DirCache{dir: dir, wavMap: make(map[string]string), dirMap: make(map[string]string)}
	}
	wavMap := make(map[string]string, len(entries))
	dirMap := make(map[string]string, len(entries))
	for _, entry := range entries {
		if entry.IsDir() {
			continue
		}
		name := entry.Name()
		ext := filepath.Ext(name)
		base := strings.TrimSuffix(name, ext)
		dirMap[strings.ToLower(base)] = name
		if strings.EqualFold(ext, ".wav") {
			wavMap[strings.ToLower(base)] = name
		}
	}
	return &DirCache{dir: dir, wavMap: wavMap, dirMap: dirMap}
}
// FindWAV looks up a WAV file by basename (case-insensitive).
// Returns the full path with correct case, or empty string if not found.
func (dc *DirCache) FindWAV(baseName string) string {
	if name, ok := dc.wavMap[strings.ToLower(baseName)]; ok {
		return filepath.Join(dc.dir, name)
	}
	return ""
}
// FindFile looks up any file by basename (case-insensitive).
// Returns the full path with correct case, or empty string if not found.
func (dc *DirCache) FindFile(baseName string) string {
	if name, ok := dc.dirMap[strings.ToLower(baseName)]; ok {
		return filepath.Join(dc.dir, name)
	}
	return ""
}
// findWAVFile finds a WAV file in the directory with case-insensitive matching.
// baseName is the filename without extension (e.g., "20230610_150000").
// Returns the full path with correct case, or empty string if not found.
// Deprecated: Use DirCache.FindWAV for batch operations to avoid repeated directory scans.
func findWAVFile(dir, baseName string) string {
	entries, err := os.ReadDir(dir)
	if err != nil {
		return ""
	}
	for _, entry := range entries {
		if entry.IsDir() {
			continue
		}
		name := entry.Name()
		ext := filepath.Ext(name)
		nameNoExt := strings.TrimSuffix(name, ext)
		if nameNoExt == baseName && strings.EqualFold(ext, ".wav") {
			return filepath.Join(dir, name)
		}
	}
	return ""
}
// writeDotFiles writes AviaNZ .data files for each audio file with calls
// Uses parallel workers for improved performance on large batches
func writeDotFiles(csvPath, filter string, calls []ClusteredCall, progress ProgressHandler) (int, int, error) {
	// Base directory is the directory containing the CSV file
	csvDir := filepath.Dir(csvPath)
	// Group calls by file (using extracted filename)
	callsByFile := make(map[string][]ClusteredCall)
	for _, call := range calls {
		filename := filepath.Base(call.File)
		callsByFile[filename] = append(callsByFile[filename], call)
	}
	// Report initial progress
	if progress != nil {
		progress(0, len(callsByFile), "Processing WAV files")
	}
	// If small batch, process sequentially (avoid goroutine overhead)
	if len(callsByFile) < 10 {
		return writeDotFilesSequential(csvDir, filter, callsByFile, progress)
	}
	// Parallel processing for larger batches
	return writeDotFilesParallel(csvDir, filter, callsByFile, progress)
}
// dotDataJob represents a single file to process
type dotDataJob struct {
	filename  string
	fileCalls []ClusteredCall
}
// dotDataResult represents the result of processing a single file
type dotDataResult struct {
	filename string
	written  bool
	err      error
}
// writeDotFilesSequential processes files one at a time (for small batches)
func writeDotFilesSequential(csvDir, filter string, callsByFile map[string][]ClusteredCall, progress ProgressHandler) (int, int, error) {
	dataFilesWritten := 0
	dataFilesSkipped := 0
	total := len(callsByFile)
	processed := 0
	for filename, fileCalls := range callsByFile {
		// Find WAV file with correct case
		baseName := strings.TrimSuffix(filename, filepath.Ext(filename))
		wavPath := findWAVFile(csvDir, baseName)
		if wavPath == "" {
			dataFilesSkipped++
			processed++
			if progress != nil {
				progress(processed, total, "")
			}
			continue
		}
		dataPath := wavPath + ".data"
		sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
		if err != nil {
			dataFilesSkipped++
			processed++
			if progress != nil {
				progress(processed, total, "")
			}
			continue
		}
		// Build segments and metadata
		meta, segments := buildAviaNZMetaAndSegments(fileCalls, filter, duration, sampleRate)
		if err := writeDotDataFileSafe(dataPath, segments, filter, meta); err != nil {
			return dataFilesWritten, dataFilesSkipped, fmt.Errorf("failed to write %s: %w", dataPath, err)
		}
		dataFilesWritten++
		processed++
		if progress != nil {
			progress(processed, total, "")
		}
	}
	return dataFilesWritten, dataFilesSkipped, nil
}
// writeDotFilesParallel processes files concurrently using a worker pool
func writeDotFilesParallel(csvDir, filter string, callsByFile map[string][]ClusteredCall, progress ProgressHandler) (int, int, error) {
	total := len(callsByFile)
	var processed atomic.Int32
	// Create job channel
	jobs := make(chan dotDataJob, len(callsByFile))
	results := make(chan dotDataResult, len(callsByFile))
	// Start workers
	var wg sync.WaitGroup
	for range DOT_DATA_WORKERS {
		wg.Add(1)
		go dotDataWorker(csvDir, filter, jobs, results, &wg)
	}
	// Send jobs
	for filename, fileCalls := range callsByFile {
		jobs <- dotDataJob{filename: filename, fileCalls: fileCalls}
	}
	close(jobs)
	// Wait for workers to finish
	go func() {
		wg.Wait()
		close(results)
	}()
	// Collect results with progress reporting
	dataFilesWritten := 0
	dataFilesSkipped := 0
	var firstErr error
	for result := range results {
		if result.err != nil && firstErr == nil {
			firstErr = result.err
		}
		if result.written {
			dataFilesWritten++
		} else {
			dataFilesSkipped++
		}
		// Report progress
		if progress != nil {
			current := int(processed.Add(1))
			progress(current, total, "")
		}
	}
	return dataFilesWritten, dataFilesSkipped, firstErr
}
// dotDataWorker processes files from the jobs channel
func dotDataWorker(csvDir, filter string, jobs <-chan dotDataJob, results chan<- dotDataResult, wg *sync.WaitGroup) {
	defer wg.Done()
	for job := range jobs {
		// Find WAV file with correct case
		baseName := strings.TrimSuffix(job.filename, filepath.Ext(job.filename))
		wavPath := findWAVFile(csvDir, baseName)
		if wavPath == "" {
			results <- dotDataResult{filename: job.filename, written: false, err: nil}
			continue
		}
		dataPath := wavPath + ".data"
		sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
		if err != nil {
			results <- dotDataResult{filename: job.filename, written: false, err: nil}
			continue
		}
		// Build segments and metadata
		meta, segments := buildAviaNZMetaAndSegments(job.fileCalls, filter, duration, sampleRate)
		if err := writeDotDataFileSafe(dataPath, segments, filter, meta); err != nil {
			results <- dotDataResult{filename: job.filename, written: false, err: fmt.Errorf("failed to write %s: %w", dataPath, err)}
			continue
		}
		results <- dotDataResult{filename: job.filename, written: true, err: nil}
	}
}
// buildAviaNZMetaAndSegments creates metadata and segments for a .data file
func buildAviaNZMetaAndSegments(calls []ClusteredCall, filter string, duration float64, sampleRate int) (AviaNZMeta, []AviaNZSegment) {
	// Create metadata
	reviewer := "None"
	meta := AviaNZMeta{
		Operator: "Auto",
		Reviewer: &reviewer,
		Duration: duration,
	}
	// Build segments array
	var segments []AviaNZSegment
	for _, call := range calls {
		// Create labels for this segment
		labels := []AviaNZLabel{
			{
				Species:   call.EbirdCode,
				Certainty: DEFAULT_CERTAINTY,
				Filter:    filter,
			},
		}
		// Create segment: [start, end, freq_low, freq_high, labels]
		// freq_low=0, freq_high=sampleRate for full-band segments
		segment := AviaNZSegment{
			call.StartTime,
			call.EndTime,
			0,          // freq_low
			sampleRate, // freq_high (full band)
			labels,
		}
		segments = append(segments, segment)
	}
	return meta, segments
}
// writeAviaNZDataFile writes a new .data file to disk (does not check for existing files)
func writeAviaNZDataFile(path string, data []any) error {
	file, err := os.Create(path)
	if err != nil {
		return fmt.Errorf("failed to create file: %w", err)
	}
	defer func() { _ = file.Close() }()
	encoder := json.NewEncoder(file)
	encoder.SetIndent("", "") // No indentation for compact output
	if err := encoder.Encode(data); err != nil {
		return fmt.Errorf("failed to encode JSON: %w", err)
	}
	return nil
}
// writeDotDataFileSafe safely writes or merges .data files
// - If file doesn't exist: write new file
// - If file exists with same filter: return error (refuse to clobber)
// - If file exists with different filter: merge segments and write
// - If file exists but can't be parsed: return error (refuse to clobber)
func writeDotDataFileSafe(path string, newSegments []AviaNZSegment, filter string, meta AviaNZMeta) error {
	// Check if file exists
	if _, err := os.Stat(path); err == nil {
		// File exists - parse and check
		existing, err := utils.ParseDataFile(path)
		if err != nil {
			return fmt.Errorf("cannot parse existing %s: %w (refusing to clobber)", path, err)
		}
		// Check for duplicate filter
		for _, seg := range existing.Segments {
			if seg.HasFilterLabel(filter) {
				return fmt.Errorf("%s already contains filter '%s' (refusing to clobber)", path, filter)
			}
		}
		// Append new segments (different filter - safe to merge)
		for _, newSeg := range newSegments {
			seg := convertAviaNZSegment(newSeg, filter)
			existing.Segments = append(existing.Segments, seg)
		}
		// Sort by start time
		sort.Slice(existing.Segments, func(i, j int) bool {
			return existing.Segments[i].StartTime < existing.Segments[j].StartTime
		})
		return existing.Write(path)
	}
	// File doesn't exist - write new
	data := buildDataFileFromSegments(meta, newSegments)
	return writeAviaNZDataFile(path, data)
}
// convertAviaNZSegment converts an AviaNZSegment to utils.Segment
func convertAviaNZSegment(seg AviaNZSegment, filter string) *utils.Segment {
	labels := seg[4].([]AviaNZLabel)
	utilsLabels := make([]*utils.Label, len(labels))
	for i, l := range labels {
		utilsLabels[i] = &utils.Label{
			Species:   l.Species,
			Certainty: l.Certainty,
			Filter:    filter,
		}
	}
	// Handle freq values (could be int or float64 depending on how they were created)
	var freqLow, freqHigh float64
	switch v := seg[2].(type) {
	case int:
		freqLow = float64(v)
	case float64:
		freqLow = v
	}
	switch v := seg[3].(type) {
	case int:
		freqHigh = float64(v)
	case float64:
		freqHigh = v
	}
	return &utils.Segment{
		StartTime: seg[0].(float64),
		EndTime:   seg[1].(float64),
		FreqLow:   freqLow,
		FreqHigh:  freqHigh,
		Labels:    utilsLabels,
	}
}
// buildDataFileFromSegments builds the data file structure from meta and segments
func buildDataFileFromSegments(meta AviaNZMeta, segments []AviaNZSegment) []any {
	result := make([]any, 0, 1+len(segments))
	result = append(result, meta)
	for _, seg := range segments {
		result = append(result, seg)
	}
	return result
}
// ParseFilterFromFilename extracts filter name from preds CSV filename
// "predsST_opensoundscape-kiwi-1.2_2025-11-12.csv" -> "opensoundscape-kiwi-1.2"
// Returns empty string if parsing fails
func ParseFilterFromFilename(csvPath string) string {
	filename := filepath.Base(csvPath)
	// Remove .csv extension
	name := strings.TrimSuffix(filename, ".csv")
	// Split on underscore
	parts := strings.Split(name, "_")
	if len(parts) == 3 {
		return parts[1]
	}
	return ""
}
// clusterStartTimes groups consecutive start times into clusters
// where the gap between consecutive times is <= gapThreshold
func clusterStartTimes(startTimes []float64, gapThreshold float64) [][]float64 {
	if len(startTimes) == 0 {
		return nil
	}
	var clusters [][]float64
	currentCluster := []float64{startTimes[0]}
	for i := 1; i < len(startTimes); i++ {
		gap := startTimes[i] - startTimes[i-1]
		if gap <= gapThreshold {
			// Same cluster
			currentCluster = append(currentCluster, startTimes[i])
		} else {
			// New cluster
			clusters = append(clusters, currentCluster)
			currentCluster = []float64{startTimes[i]}
		}
	}
	// Don't forget the last cluster
	clusters = append(clusters, currentCluster)
	return clusters
}
	return cols, nil
}
	ignoredColumns := map[string]bool{"NotKiwi": true, "0.0": true}

File deletion: calls_from_birda_raven_test.go

BF:BFD[6.248737] → [6.515903:515957]

BF:BFD[6.515957] → [6.504774:504774]

B:BD[6.504774] → [6.504775:515902]

package tools
import (
	"os"
	"path/filepath"
	"testing"
	"skraak/utils"
)
// ============================================
// BirdNET Tests
// ============================================
func TestCallsFromBirda_NewDataFile(t *testing.T) {
	tmpDir := t.TempDir()
	// Create a minimal WAV file
	wavPath := filepath.Join(tmpDir, "test.WAV")
	createMinimalWAV(t, wavPath, 16000, 60.0)
	// Create BirdNET results file
	birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
	birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Turdus migratorius,American Robin,0.85,/some/path/test.WAV\n"
	if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
		t.Fatal(err)
	}
	input := CallsFromBirdaInput{
		File: birdaPath,
	}
	output, err := CallsFromBirda(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.DataFilesWritten != 1 {
		t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
	}
	if output.Filter != "BirdNET" {
		t.Errorf("expected filter 'BirdNET', got '%s'", output.Filter)
	}
	if output.TotalCalls != 1 {
		t.Errorf("expected 1 call, got %d", output.TotalCalls)
	}
	// Verify .data file was created
	dataPath := wavPath + ".data"
	df, err := utils.ParseDataFile(dataPath)
	if err != nil {
		t.Fatalf("failed to parse .data file: %v", err)
	}
	if len(df.Segments) != 1 {
		t.Errorf("expected 1 segment, got %d", len(df.Segments))
	}
	if df.Segments[0].Labels[0].Filter != "BirdNET" {
		t.Errorf("expected filter 'BirdNET', got '%s'", df.Segments[0].Labels[0].Filter)
	}
	if df.Segments[0].Labels[0].Certainty != 85 {
		t.Errorf("expected certainty 85, got %d", df.Segments[0].Labels[0].Certainty)
	}
}
func TestCallsFromBirda_ExistingSameFilter(t *testing.T) {
	tmpDir := t.TempDir()
	wavPath := filepath.Join(tmpDir, "test.WAV")
	createMinimalWAV(t, wavPath, 16000, 60.0)
	dataPath := wavPath + ".data"
	existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Existing Bird", "certainty": 90, "filter": "BirdNET"}]]]`
	if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
		t.Fatal(err)
	}
	birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
	birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,New Bird,New Bird,0.85,test.WAV\n"
	if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
		t.Fatal(err)
	}
	input := CallsFromBirdaInput{File: birdaPath}
	output, err := CallsFromBirda(input)
	if err == nil {
		t.Error("expected error for same filter, got nil")
	}
	if output.Error == nil {
		t.Error("expected error message in output")
	}
}
func TestCallsFromBirda_ExistingDifferentFilter(t *testing.T) {
	tmpDir := t.TempDir()
	wavPath := filepath.Join(tmpDir, "test.WAV")
	createMinimalWAV(t, wavPath, 16000, 60.0)
	dataPath := wavPath + ".data"
	existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Kiwi", "certainty": 90, "filter": "Manual"}]]]`
	if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
		t.Fatal(err)
	}
	birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
	birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Robin,Robin,0.85,test.WAV\n"
	if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
		t.Fatal(err)
	}
	input := CallsFromBirdaInput{File: birdaPath}
	output, err := CallsFromBirda(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.DataFilesWritten != 1 {
		t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
	}
	df, err := utils.ParseDataFile(dataPath)
	if err != nil {
		t.Fatalf("failed to parse .data file: %v", err)
	}
	if len(df.Segments) != 2 {
		t.Errorf("expected 2 segments after merge, got %d", len(df.Segments))
	}
}
func TestCallsFromBirda_DeleteOption(t *testing.T) {
	tmpDir := t.TempDir()
	wavPath := filepath.Join(tmpDir, "test.WAV")
	createMinimalWAV(t, wavPath, 16000, 60.0)
	birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
	birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Robin,Robin,0.85,test.WAV\n"
	if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
		t.Fatal(err)
	}
	input := CallsFromBirdaInput{File: birdaPath, Delete: true}
	output, err := CallsFromBirda(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.FilesDeleted != 1 {
		t.Errorf("expected 1 file deleted, got %d", output.FilesDeleted)
	}
	if _, err := os.Stat(birdaPath); !os.IsNotExist(err) {
		t.Error("expected BirdNET file to be deleted")
	}
}
func TestCallsFromBirda_FolderMode(t *testing.T) {
	tmpDir := t.TempDir()
	for i := range 2 {
		wavPath := filepath.Join(tmpDir, "test"+string(rune('0'+i))+".WAV")
		createMinimalWAV(t, wavPath, 16000, 60.0)
		birdaPath := filepath.Join(tmpDir, "test"+string(rune('0'+i))+".BirdNET.results.csv")
		birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Bird,Bird,0.85,test.WAV\n"
		if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
			t.Fatal(err)
		}
	}
	input := CallsFromBirdaInput{Folder: tmpDir}
	output, err := CallsFromBirda(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.FilesProcessed != 2 {
		t.Errorf("expected 2 files processed, got %d", output.FilesProcessed)
	}
	if output.DataFilesWritten != 2 {
		t.Errorf("expected 2 data files written, got %d", output.DataFilesWritten)
	}
}
// ============================================
// Raven Tests
// ============================================
func TestCallsFromRaven_NewDataFile(t *testing.T) {
	tmpDir := t.TempDir()
	wavPath := filepath.Join(tmpDir, "test.WAV")
	createMinimalWAV(t, wavPath, 16000, 60.0)
	ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
	ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tKiwi\n"
	if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
		t.Fatal(err)
	}
	input := CallsFromRavenInput{File: ravenPath}
	output, err := CallsFromRaven(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.DataFilesWritten != 1 {
		t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
	}
	if output.Filter != "Raven" {
		t.Errorf("expected filter 'Raven', got '%s'", output.Filter)
	}
	dataPath := wavPath + ".data"
	df, err := utils.ParseDataFile(dataPath)
	if err != nil {
		t.Fatalf("failed to parse .data file: %v", err)
	}
	if df.Segments[0].FreqLow != 1000 {
		t.Errorf("expected freq_low 1000, got %f", df.Segments[0].FreqLow)
	}
	if df.Segments[0].FreqHigh != 5000 {
		t.Errorf("expected freq_high 5000, got %f", df.Segments[0].FreqHigh)
	}
}
func TestCallsFromRaven_ExistingSameFilter(t *testing.T) {
	tmpDir := t.TempDir()
	wavPath := filepath.Join(tmpDir, "test.WAV")
	createMinimalWAV(t, wavPath, 16000, 60.0)
	dataPath := wavPath + ".data"
	existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Existing", "certainty": 90, "filter": "Raven"}]]]`
	if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
		t.Fatal(err)
	}
	ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
	ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tNew\n"
	if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
		t.Fatal(err)
	}
	input := CallsFromRavenInput{File: ravenPath}
	output, err := CallsFromRaven(input)
	if err == nil {
		t.Error("expected error for same filter, got nil")
	}
	if output.Error == nil {
		t.Error("expected error message in output")
	}
}
func TestCallsFromRaven_ExistingDifferentFilter(t *testing.T) {
	tmpDir := t.TempDir()
	wavPath := filepath.Join(tmpDir, "test.WAV")
	createMinimalWAV(t, wavPath, 16000, 60.0)
	dataPath := wavPath + ".data"
	existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Kiwi", "certainty": 90, "filter": "BirdNET"}]]]`
	if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
		t.Fatal(err)
	}
	ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
	ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tMorepork\n"
	if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
		t.Fatal(err)
	}
	input := CallsFromRavenInput{File: ravenPath}
	output, err := CallsFromRaven(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.DataFilesWritten != 1 {
		t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
	}
	df, err := utils.ParseDataFile(dataPath)
	if err != nil {
		t.Fatalf("failed to parse .data file: %v", err)
	}
	if len(df.Segments) != 2 {
		t.Errorf("expected 2 segments after merge, got %d", len(df.Segments))
	}
}
func TestCallsFromRaven_DeleteOption(t *testing.T) {
	tmpDir := t.TempDir()
	wavPath := filepath.Join(tmpDir, "test.WAV")
	createMinimalWAV(t, wavPath, 16000, 60.0)
	ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
	ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tKiwi\n"
	if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
		t.Fatal(err)
	}
	input := CallsFromRavenInput{File: ravenPath, Delete: true}
	output, err := CallsFromRaven(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.FilesDeleted != 1 {
		t.Errorf("expected 1 file deleted, got %d", output.FilesDeleted)
	}
	if _, err := os.Stat(ravenPath); !os.IsNotExist(err) {
		t.Error("expected Raven file to be deleted")
	}
}
func TestCallsFromRaven_MultipleSelections(t *testing.T) {
	tmpDir := t.TempDir()
	wavPath := filepath.Join(tmpDir, "test.WAV")
	createMinimalWAV(t, wavPath, 16000, 60.0)
	ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
	ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tKiwi\n2\tSpectrogram 1\t1\t10.0\t15.0\t2000\t6000\tMorepork\n3\tSpectrogram 1\t1\t20.0\t25.0\t1500\t4500\tTui\n"
	if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
		t.Fatal(err)
	}
	input := CallsFromRavenInput{File: ravenPath}
	output, err := CallsFromRaven(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.TotalCalls != 3 {
		t.Errorf("expected 3 calls, got %d", output.TotalCalls)
	}
	if output.SpeciesCount["Kiwi"] != 1 || output.SpeciesCount["Morepork"] != 1 || output.SpeciesCount["Tui"] != 1 {
		t.Errorf("unexpected species count: %v", output.SpeciesCount)
	}
}

File deletion: calls_from_birda.go

BF:BFD[6.248737] → [6.529042:529085]

BF:BFD[6.529085] → [6.515959:515959]

B:BD[6.515959] → [6.515960:516036]

B:BD[6.516036] → [11.23227:23238]

∅:D[11.23238] → [6.516044:516055]

B:BD[6.516044] → [6.516044:516055]

B:BD[6.516078] → [6.516078:517043]

B:BD[6.517572] → [6.517572:517574]

B:BD[6.518301] → [6.518301:518302]

B:BD[6.518302] → [11.23239:23328]

∅:D[11.23328] → [6.518436:518437]

B:BD[6.518436] → [6.518436:518437]

B:BD[6.518437] → [11.23329:23383]

∅:D[11.23383] → [6.518600:518601]

B:BD[6.518600] → [6.518600:518601]

B:BD[6.518601] → [11.23384:23448]

∅:D[11.23448] → [6.524254:524542]

B:BD[6.524254] → [6.524254:524542]

∅:D[11.23606] → [6.524542:524544]

B:BD[6.524542] → [6.524542:524544]

∅:D[11.23826] → [6.524544:524545]

B:BD[6.524544] → [6.524544:524545]

B:BD[6.524545] → [11.23827:24632]

∅:D[11.24632] → [16.17035:17248]

B:BD[6.524545] → [16.17035:17248]

∅:D[16.17248] → [6.524929:524930]

B:BD[6.524929] → [6.524929:524930]

B:BD[6.524930] → [16.17249:17400]

∅:D[16.17400] → [6.525000:525047]

B:BD[6.525000] → [6.525000:525047]

B:BD[6.525047] → [16.17401:17477]

∅:D[16.17477] → [6.525120:525124]

B:BD[6.525120] → [6.525120:525124]

B:BD[6.525124] → [16.17478:17582]

∅:D[16.17582] → [6.525256:525286]

B:BD[6.525256] → [6.525256:525286]

B:BD[6.525313] → [6.525313:525390]

B:BD[6.525390] → [16.17583:17603]

∅:D[16.17603] → [6.525406:525424]

B:BD[6.525406] → [6.525406:525424]

B:BD[6.525424] → [16.17604:17622]

∅:D[16.17622] → [6.525438:525460]

B:BD[6.525438] → [6.525438:525460]

B:BD[6.525460] → [16.17623:17648]

∅:D[16.17648] → [6.525481:525502]

B:BD[6.525481] → [6.525481:525502]

B:BD[6.525502] → [16.17649:17674]

∅:D[16.17674] → [6.525523:525538]

B:BD[6.525523] → [6.525523:525538]

B:BD[6.525538] → [16.17675:17694]

∅:D[16.17694] → [6.525553:525561]

B:BD[6.525553] → [6.525553:525561]

B:BD[6.525561] → [16.17695:17880]

∅:D[16.17880] → [6.525727:525730]

B:BD[6.525727] → [6.525727:525730]

∅:D[16.17900] → [6.525730:525731]

B:BD[6.525730] → [6.525730:525731]

B:BD[6.525731] → [16.17901:18071]

∅:D[16.18071] → [6.525751:525876]

B:BD[6.525751] → [6.525751:525876]

B:BD[6.525876] → [16.18072:18132]

∅:D[16.18132] → [6.525950:525982]

B:BD[6.525950] → [6.525950:525982]

B:BD[6.525982] → [11.24633:24809]

∅:D[16.18307] → [6.526162:526166]

∅:D[11.24809] → [6.526162:526166]

B:BD[6.526162] → [6.526162:526166]

B:BD[6.526166] → [11.24810:25007]

∅:D[16.18474] → [6.526338:526342]

∅:D[11.25007] → [6.526338:526342]

B:BD[6.526338] → [6.526338:526342]

B:BD[6.526342] → [11.25008:25033]

∅:D[11.25033] → [16.18475:18520]

B:BD[6.526342] → [16.18475:18520]

B:BD[16.18520] → [11.25034:25222]

∅:D[16.18705] → [6.526574:526578]

∅:D[11.25222] → [6.526574:526578]

B:BD[6.526574] → [6.526574:526578]

B:BD[6.526578] → [11.25223:25254]

∅:D[11.25254] → [16.18706:18796]

B:BD[6.526578] → [16.18706:18796]

∅:D[16.18796] → [6.526656:526703]

B:BD[6.526656] → [6.526656:526703]

∅:D[16.18823] → [6.526703:526704]

B:BD[6.526703] → [6.526703:526704]

B:BD[6.526704] → [16.18824:19099]

∅:D[16.19099] → [6.526785:526789]

B:BD[6.526785] → [6.526785:526789]

B:BD[6.526847] → [6.526847:526976]

B:BD[6.526976] → [16.19100:19152]

∅:D[16.19152] → [6.527156:527159]

B:BD[6.527156] → [6.527156:527159]

∅:D[16.19190] → [6.527159:527160]

B:BD[6.527159] → [6.527159:527160]

B:BD[6.527160] → [16.19191:19842]

∅:D[16.19842] → [6.527348:527352]

B:BD[6.527348] → [6.527348:527352]

∅:D[16.19916] → [6.527352:527372]

B:BD[6.527352] → [6.527352:527372]

B:BD[6.527372] → [16.19917:19948]

∅:D[16.19948] → [6.527426:527430]

B:BD[6.527426] → [6.527426:527430]

B:BD[6.527488] → [6.527488:527572]

B:BD[6.527572] → [16.19949:19980]

∅:D[16.19980] → [6.527639:527674]

B:BD[6.527639] → [6.527639:527674]

B:BD[6.527710] → [6.527710:527769]

B:BD[6.527769] → [16.19981:20042]

∅:D[16.20042] → [6.527857:527905]

B:BD[6.527857] → [6.527857:527905]

B:BD[6.527939] → [6.527939:528058]

B:BD[6.528099] → [6.528099:529041]

B:BD[6.527352] → [16.19843:19916]

B:BD[6.527159] → [16.19153:19190]

B:BD[6.526703] → [16.18797:18823]

B:BD[6.525730] → [16.17881:17900]

B:BD[6.524544] → [11.23607:23826]

B:BD[6.524542] → [11.23449:23606]

package tools
import (
	"encoding/csv"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"strconv"
	"strings"
	"skraak/utils"
)
// CallsFromBirdaInput defines the input for the calls-from-birda tool
type CallsFromBirdaInput struct {
	Folder          string          `json:"folder"`
	File            string          `json:"file"`
	Delete          bool            `json:"delete"`
	ProgressHandler ProgressHandler `json:"-"` // Optional progress callback
}
// CallsFromBirdaOutput defines the output for the calls-from-birda tool
type CallsFromBirdaOutput struct {
	Calls            []ClusteredCall `json:"calls"`
	TotalCalls       int             `json:"total_calls"`
	SpeciesCount     map[string]int  `json:"species_count"`
	DataFilesWritten int             `json:"data_files_written"`
	DataFilesSkipped int             `json:"data_files_skipped"`
	FilesProcessed   int             `json:"files_processed"`
	FilesDeleted     int             `json:"files_deleted"`
	Filter           string          `json:"filter"`
	Error            *string         `json:"error,omitempty"`
}
// birdaSource implements CallSource for BirdNET results files
type birdaSource struct{}
func (birdaSource) Name() string { return "BirdNET" }
func (birdaSource) FindFiles(folder string) ([]string, error) {
	var files []string
	entries, err := os.ReadDir(folder)
	if err != nil {
		return nil, err
	}
	for _, entry := range entries {
		name := entry.Name()
		if strings.HasSuffix(name, ".BirdNET.results.csv") {
			files = append(files, filepath.Join(folder, name))
		}
	}
	return files, nil
}
	commonOutput, err := callsFromSource(src, commonInput)
	// Convert to Birda-specific output type
	var output CallsFromBirdaOutput
	output.Calls = commonOutput.Calls
	output.TotalCalls = commonOutput.TotalCalls
	output.SpeciesCount = commonOutput.SpeciesCount
	output.DataFilesWritten = commonOutput.DataFilesWritten
	output.DataFilesSkipped = commonOutput.DataFilesSkipped
	output.FilesProcessed = commonOutput.FilesProcessed
	output.FilesDeleted = commonOutput.FilesDeleted
	output.Filter = commonOutput.Filter
	output.Error = commonOutput.Error
	return output, err
}
// BirdNETDetection represents a single BirdNET detection
type BirdNETDetection struct {
	StartTime      float64
	EndTime        float64
	ScientificName string
	CommonName     string
	Confidence     float64
	WAVPath        string
}
// birdaColumnIndices holds the parsed column positions from a BirdNET CSV header.
type birdaColumnIndices struct {
	startIdx      int
	endIdx        int
	commonNameIdx int
	confidenceIdx int
	fileIdx       int
}
// parseBirdaCSVHeader reads the CSV header row and returns column indices.
func parseBirdaCSVHeader(reader *csv.Reader) (birdaColumnIndices, error) {
	header, err := reader.Read()
	if err != nil {
		return birdaColumnIndices{}, fmt.Errorf("failed to read header: %w", err)
	}
	idx := birdaColumnIndices{startIdx: -1, endIdx: -1, commonNameIdx: -1, confidenceIdx: -1, fileIdx: -1}
	for i, col := range header {
		col = strings.TrimPrefix(col, "\ufeff")
		switch col {
		case "Start (s)":
			idx.startIdx = i
		case "End (s)":
			idx.endIdx = i
		case "Common name":
			idx.commonNameIdx = i
		case "Confidence":
			idx.confidenceIdx = i
		case "File":
			idx.fileIdx = i
		}
	}
	if idx.startIdx == -1 || idx.endIdx == -1 || idx.commonNameIdx == -1 || idx.confidenceIdx == -1 {
		return birdaColumnIndices{}, fmt.Errorf("missing required columns in BirdNET file")
	}
// readBirdaDetections reads all detection records from a BirdNET CSV.
func readBirdaDetections(reader *csv.Reader, idx birdaColumnIndices) ([]BirdNETDetection, error) {
	var detections []BirdNETDetection
	for {
		record, err := reader.Read()
		if err == io.EOF {
			break
		}
		if err != nil {
			return nil, fmt.Errorf("failed to read record: %w", err)
		}
		var det BirdNETDetection
		startTime, perr := strconv.ParseFloat(record[idx.startIdx], 64)
		if perr != nil {
			return nil, fmt.Errorf("failed to parse start time %q: %w", record[idx.startIdx], perr)
		}
		det.StartTime = startTime
		endTime, perr := strconv.ParseFloat(record[idx.endIdx], 64)
		if perr != nil {
			return nil, fmt.Errorf("failed to parse end time %q: %w", record[idx.endIdx], perr)
		}
		det.EndTime = endTime
		det.CommonName = record[idx.commonNameIdx]
		confidence, perr := strconv.ParseFloat(record[idx.confidenceIdx], 64)
		if perr != nil {
			return nil, fmt.Errorf("failed to parse confidence %q: %w", record[idx.confidenceIdx], perr)
		}
		det.Confidence = confidence
		if idx.fileIdx >= 0 && idx.fileIdx < len(record) {
			det.WAVPath = record[idx.fileIdx]
		}
		detections = append(detections, det)
	}
// resolveBirdaWAVPath finds the WAV file associated with a BirdNET results file.
func resolveBirdaWAVPath(birdaFile string, firstWAVPath string, cache *DirCache) string {
	if firstWAVPath != "" {
		if _, err := os.Stat(firstWAVPath); err == nil {
			return firstWAVPath
		}
	}
	dir := filepath.Dir(birdaFile)
	base := filepath.Base(birdaFile)
	baseName := strings.TrimSuffix(base, ".BirdNET.results.csv")
	if cache != nil {
		return cache.FindWAV(baseName)
	}
// processBirdaFileCached processes a single BirdNET results file using a DirCache for WAV lookup
func processBirdaFileCached(birdaFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
	file, err := os.Open(birdaFile)
	if err != nil {
		return nil, false, false, fmt.Errorf("failed to open file: %w", err)
	}
	defer func() { _ = file.Close() }()
	reader := csv.NewReader(file)
	idx, err := parseBirdaCSVHeader(reader)
	if err != nil {
		return nil, false, false, err
	}
	detections, err := readBirdaDetections(reader, idx)
	if err != nil {
		return nil, false, false, err
	}
	if len(detections) == 0 {
		return nil, false, true, nil
	}
	if wavPath == "" {
		return nil, false, true, nil
	}
	sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
	if err != nil {
		return nil, false, true, nil
	}
	dataPath := wavPath + ".data"
	segments := buildBirdNETSegments(detections, sampleRate)
	meta := AviaNZMeta{Operator: "BirdNET", Duration: duration}
	reviewer := "None"
	meta.Reviewer = &reviewer
	if err := writeDotDataFileSafe(dataPath, segments, "BirdNET", meta); err != nil {
		return nil, false, false, err
	}
	var calls []ClusteredCall
	for _, det := range detections {
		calls = append(calls, ClusteredCall{
			File:      wavPath,
			StartTime: det.StartTime,
			EndTime:   det.EndTime,
			EbirdCode: det.CommonName,
			Segments:  1,
		})
	}
	return calls, true, false, nil
}
// buildBirdNETSegments converts BirdNET detections to AviaNZ segments
func buildBirdNETSegments(detections []BirdNETDetection, sampleRate int) []AviaNZSegment {
	var segments []AviaNZSegment
	for _, det := range detections {
		// Convert confidence (0.0-1.0) to certainty (0-100)
		certainty := min(max(int(det.Confidence*100), 0), 100)
		labels := []AviaNZLabel{
			{
				Species:   det.CommonName,
				Certainty: certainty,
				Filter:    "BirdNET",
			},
		}
		segment := AviaNZSegment{
			det.StartTime,
			det.EndTime,
			0,          // freq_low
			sampleRate, // freq_high (full band)
			labels,
		}
		segments = append(segments, segment)
	}
	return segments
}
	wavPath := resolveBirdaWAVPath(birdaFile, detections[0].WAVPath, cache)
	return findWAVFile(dir, baseName)
}
	return detections, nil
}
	return idx, nil
}
// CallsFromBirda processes BirdNET results files and writes .data files
func CallsFromBirda(input CallsFromBirdaInput) (CallsFromBirdaOutput, error) {
	src := birdaSource{}
	commonInput := CallsFromSourceInput(input)
}
func (birdaSource) ProcessFile(birdaFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
	return processBirdaFileCached(birdaFile, cache)

File deletion: calls_detect_anomalies_test.go

BF:BFD[6.248737] → [6.532511:532565]

BF:BFD[6.532565] → [6.529087:529087]

B:BD[6.529087] → [6.529088:532510]

package tools
import (
	"os"
	"path/filepath"
	"testing"
)
func TestDetectAnomalies_LabelMismatch(t *testing.T) {
	dir := t.TempDir()
	// Same time range, different calltypes across two models
	data := `[{"Operator":"test"},` +
		`[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-a"},` +
		`{"species":"Kiwi","calltype":"Male","certainty":100,"filter":"model-b"}]]]`
	if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
		t.Fatal(err)
	}
	out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
	if err != nil {
		t.Fatal(err)
	}
	if out.LabelMismatches != 1 {
		t.Errorf("expected 1 label mismatch, got %d", out.LabelMismatches)
	}
	if out.CertaintyMismatches != 0 {
		t.Errorf("expected 0 certainty mismatches, got %d", out.CertaintyMismatches)
	}
	if out.Anomalies[0].Type != "label_mismatch" {
		t.Errorf("expected label_mismatch, got %s", out.Anomalies[0].Type)
	}
}
func TestDetectAnomalies_CertaintyMismatch(t *testing.T) {
	dir := t.TempDir()
	// Same time range, same labels, different certainty
	data := `[{"Operator":"test"},` +
		`[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":90,"filter":"model-a"},` +
		`{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-b"}]]]`
	if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
		t.Fatal(err)
	}
	out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
	if err != nil {
		t.Fatal(err)
	}
	if out.CertaintyMismatches != 1 {
		t.Errorf("expected 1 certainty mismatch, got %d", out.CertaintyMismatches)
	}
	if out.LabelMismatches != 0 {
		t.Errorf("expected 0 label mismatches, got %d", out.LabelMismatches)
	}
}
func TestDetectAnomalies_NoAnomalyWhenAgreement(t *testing.T) {
	dir := t.TempDir()
	data := `[{"Operator":"test"},` +
		`[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-a"},` +
		`{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-b"}]]]`
	if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
		t.Fatal(err)
	}
	out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
	if err != nil {
		t.Fatal(err)
	}
	if out.AnomaliesTotal != 0 {
		t.Errorf("expected 0 anomalies, got %d", out.AnomaliesTotal)
	}
}
func TestDetectAnomalies_LonelySegmentSkipped(t *testing.T) {
	dir := t.TempDir()
	// model-a has a segment, model-b has no segment in this file
	data := `[{"Operator":"test"},` +
		`[0,10,100,1000,[{"species":"Kiwi","certainty":100,"filter":"model-a"}]]]`
	if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
		t.Fatal(err)
	}
	out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
	if err != nil {
		t.Fatal(err)
	}
	if out.AnomaliesTotal != 0 {
		t.Errorf("lonely segment should be skipped, got %d anomalies", out.AnomaliesTotal)
	}
	if out.FilesWithAllModels != 0 {
		t.Errorf("file missing a model should not count as FilesWithAllModels")
	}
}
func TestDetectAnomalies_FailsWithOneModel(t *testing.T) {
	dir := t.TempDir()
	_, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a"}})
	if err == nil {
		t.Error("expected error with only 1 model")
	}
}

File deletion: calls_detect_anomalies.go

BF:BFD[6.248737] → [6.539374:539423]

BF:BFD[6.539423] → [6.532567:532567]

B:BD[6.532567] → [6.532568:534171]

B:BD[6.534171] → [10.11700:11838]

∅:D[10.11838] → [6.534373:534401]

B:BD[6.534373] → [6.534373:534401]

B:BD[6.534401] → [10.11839:11897]

∅:D[10.11897] → [6.534503:534600]

B:BD[6.534503] → [6.534503:534600]

B:BD[6.534600] → [10.11898:11964]

∅:D[10.11964] → [6.534712:534778]

B:BD[6.534712] → [6.534712:534778]

B:BD[6.534778] → [10.11965:12023]

∅:D[10.12023] → [6.534893:534916]

B:BD[6.534893] → [6.534893:534916]

B:BD[6.534916] → [10.12024:12298]

∅:D[10.12298] → [6.535030:535034]

B:BD[6.535030] → [6.535030:535034]

∅:D[10.12406] → [6.535034:536263]

B:BD[6.535034] → [6.535034:536263]

B:BD[6.536263] → [20.14336:14383]

∅:D[20.14383] → [6.536803:536964]

B:BD[6.536803] → [6.536803:536964]

B:BD[6.537119] → [6.537119:537166]

B:BD[6.537166] → [20.14384:14696]

∅:D[20.14696] → [6.537364:537373]

B:BD[6.537364] → [6.537364:537373]

∅:D[20.14720] → [6.537373:537374]

B:BD[6.537373] → [6.537373:537374]

B:BD[6.537374] → [20.14721:15164]

∅:D[20.15164] → [6.537689:537700]

B:BD[6.537689] → [6.537689:537700]

B:BD[6.537766] → [6.537766:537770]

B:BD[6.537770] → [20.15165:15188]

∅:D[20.15188] → [6.537800:537801]

B:BD[6.537800] → [6.537800:537801]

B:BD[6.537801] → [20.15189:15527]

∅:D[20.15527] → [6.538041:538042]

B:BD[6.538041] → [6.538041:538042]

B:BD[6.538042] → [20.15528:16036]

∅:D[20.16036] → [6.538323:538332]

B:BD[6.538323] → [6.538323:538332]

∅:D[20.16108] → [6.538332:538333]

B:BD[6.538332] → [6.538332:538333]

B:BD[6.538333] → [20.16109:16420]

∅:D[20.16420] → [6.538489:538490]

B:BD[6.538489] → [6.538489:538490]

B:BD[6.538490] → [20.16421:16879]

∅:D[20.16879] → [6.538789:538796]

B:BD[6.538789] → [6.538789:538796]

B:BD[6.538796] → [20.16880:17127]

∅:D[20.17127] → [6.538815:539373]

B:BD[6.538815] → [6.538815:539373]

B:BD[6.538332] → [20.16037:16108]

B:BD[6.537373] → [20.14697:14720]

B:BD[6.535034] → [10.12299:12406]

package tools
import (
	"fmt"
	"os"
	"path/filepath"
	"skraak/utils"
)
type DetectAnomaliesInput struct {
	Folder  string
	Models  []string // at least 2 filter names
	Species []string // optional scope; empty = all species
}
type DetectAnomaliesOutput struct {
	Folder              string    `json:"folder"`
	Models              []string  `json:"models"`
	FilesExamined       int       `json:"files_examined"`
	FilesWithAllModels  int       `json:"files_with_all_models"`
	AnomaliesTotal      int       `json:"anomalies_total"`
	LabelMismatches     int       `json:"label_mismatches"`
	CertaintyMismatches int       `json:"certainty_mismatches"`
	Anomalies           []Anomaly `json:"anomalies,omitempty"`
	Error               string    `json:"error,omitempty"`
}
type Anomaly struct {
	File     string           `json:"file"`
	Type     string           `json:"type"` // "label_mismatch" | "certainty_mismatch"
	Segments []AnomalySegment `json:"segments"`
}
type AnomalySegment struct {
	Model     string  `json:"model"`
	Start     float64 `json:"start"`
	End       float64 `json:"end"`
	Species   string  `json:"species"`
	CallType  string  `json:"calltype,omitempty"`
	Certainty int     `json:"certainty"`
}
// DetectAnomalies compares corresponding segments across multiple ML model filters
// within each .data file. Segments are matched by time overlap (same logic as propagate).
// Lonely segments (no overlap in one or more models) are silently skipped.
// Anomalies are flagged when overlapping segments disagree on species+calltype,
// or when labels match but certainty values differ.
// validateAnomalyInput validates the input parameters for DetectAnomalies.
func validateAnomalyInput(input DetectAnomaliesInput) error {
	if len(input.Models) < 2 {
		return fmt.Errorf("at least 2 --model values required")
	}
	for i, a := range input.Models {
		for j, b := range input.Models {
			if i != j && a == b {
				return fmt.Errorf("duplicate --model values are not allowed")
			}
		}
	}
	info, err := os.Stat(input.Folder)
	if err != nil {
		return fmt.Errorf("folder not found: %s", input.Folder)
	}
	if !info.IsDir() {
		return fmt.Errorf("not a directory: %s", input.Folder)
	}
	return nil
}
func DetectAnomalies(input DetectAnomaliesInput) (DetectAnomaliesOutput, error) {
	folder := filepath.Clean(input.Folder)
	output := DetectAnomaliesOutput{
		Folder: folder,
		Models: input.Models,
	}
	files, err := utils.FindDataFiles(folder)
	if err != nil {
		output.Error = fmt.Sprintf("list .data files: %v", err)
		return output, fmt.Errorf("%s", output.Error)
	}
	scopeSet := make(map[string]bool, len(input.Species))
	for _, s := range input.Species {
		scopeSet[s] = true
	}
	for _, path := range files {
		df, err := utils.ParseDataFile(path)
		if err != nil {
			continue
		}
		output.FilesExamined++
		anomalies := detectAnomaliesInFile(df, path, input.Models, scopeSet)
		if anomalies == nil {
			// file didn't have all models present
			continue
		}
		output.FilesWithAllModels++
		for _, a := range anomalies {
			if a.Type == "label_mismatch" {
				output.LabelMismatches++
			} else {
				output.CertaintyMismatches++
			}
		}
		output.Anomalies = append(output.Anomalies, anomalies...)
	}
	output.AnomaliesTotal = len(output.Anomalies)
	return output, nil
}
// labeledSeg pairs a segment with the specific label matching the model filter.
type labeledSeg struct {
	seg   *utils.Segment
	label *utils.Label
}
// detectAnomaliesInFile returns nil if the file doesn't contain all required models.
func detectAnomaliesInFile(df *utils.DataFile, path string, models []string, scope map[string]bool) []Anomaly {
	modelSegs := collectModelSegments(df, models)
	// Skip file if any model is entirely absent.
	for _, model := range models {
		if len(modelSegs[model]) == 0 {
			return nil
		}
	}
	var anomalies []Anomaly
	for _, anchor := range modelSegs[models[0]] {
		if !inScope(anchor, scope) {
			continue
		}
		if matches := findOverlappingMatches(anchor, models, modelSegs); matches == nil {
			continue
		} else {
			group := buildComparisonGroup(anchor, models, matches)
			if a := checkGroupAnomaly(group, path, models); a != nil {
				anomalies = append(anomalies, *a)
			}
		}
// collectModelSegments groups labeled segments by model filter name.
func collectModelSegments(df *utils.DataFile, models []string) map[string][]labeledSeg {
	modelSegs := make(map[string][]labeledSeg, len(models))
	for _, seg := range df.Segments {
		for _, lbl := range seg.Labels {
			for _, model := range models {
				if lbl.Filter == model {
					modelSegs[model] = append(modelSegs[model], labeledSeg{seg: seg, label: lbl})
					break
				}
			}
		}
	}
	return modelSegs
}
// inScope returns true if the anchor's label is within the species scope filter.
func inScope(anchor labeledSeg, scope map[string]bool) bool {
	if len(scope) == 0 {
		return true
	}
	key := anchor.label.Species
	if anchor.label.CallType != "" {
		key += "+" + anchor.label.CallType
	}
	return scope[key] || scope[anchor.label.Species]
}
// findOverlappingMatches returns matches[model] = overlapping segments from that model,
// or nil if any model has no overlap (lonely anchor).
func findOverlappingMatches(anchor labeledSeg, models []string, modelSegs map[string][]labeledSeg) map[string][]labeledSeg {
	matches := make(map[string][]labeledSeg, len(models)-1)
	for _, model := range models[1:] {
		for _, candidate := range modelSegs[model] {
			if overlaps(anchor.seg, candidate.seg) {
				matches[model] = append(matches[model], candidate)
			}
		}
// buildComparisonGroup assembles anchor + first match per other model.
func buildComparisonGroup(anchor labeledSeg, models []string, matches map[string][]labeledSeg) []labeledSeg {
	group := []labeledSeg{anchor}
	for _, model := range models[1:] {
		group = append(group, matches[model][0])
	}
	return group
}
// checkGroupAnomaly checks a comparison group for label or certainty mismatches.
func checkGroupAnomaly(group []labeledSeg, path string, models []string) *Anomaly {
	refSpecies := group[0].label.Species
	refCallType := group[0].label.CallType
	for _, ls := range group[1:] {
		if ls.label.Species != refSpecies || ls.label.CallType != refCallType {
			a := Anomaly{File: path, Type: "label_mismatch", Segments: buildAnomalySegs(group, models)}
			return &a
		}
	}
	refCertainty := group[0].label.Certainty
	for _, ls := range group[1:] {
		if ls.label.Certainty != refCertainty {
			a := Anomaly{File: path, Type: "certainty_mismatch", Segments: buildAnomalySegs(group, models)}
			return &a
		}
	}
	return nil
}
func buildAnomalySegs(group []labeledSeg, models []string) []AnomalySegment {
	segs := make([]AnomalySegment, len(group))
	for i, ls := range group {
		segs[i] = AnomalySegment{
			Model:     models[i],
			Start:     ls.seg.StartTime,
			End:       ls.seg.EndTime,
			Species:   ls.label.Species,
			CallType:  ls.label.CallType,
			Certainty: ls.label.Certainty,
		}
	}
	return segs
}
// overlaps returns true if two segments share any time overlap.
func overlaps(a, b *utils.Segment) bool {
	return a.StartTime < b.EndTime && b.StartTime < a.EndTime
}
		if len(matches[model]) == 0 {
			return nil
		}
	}
	return matches
}
	}
	return anomalies
}
	if err := validateAnomalyInput(input); err != nil {
		output.Error = err.Error()
		return output, err
	}

File deletion: calls_clip_labels_test.go

BF:BFD[6.248737] → [6.549826:549875]

BF:BFD[6.549875] → [6.539425:539425]

B:BD[6.539425] → [6.539426:549825]

package tools
import (
	"encoding/csv"
	"os"
	"path/filepath"
	"strings"
	"testing"
	"skraak/utils"
)
// --- test helpers (test file only) ---
func writeDataFile(t *testing.T, dir, name string, df *utils.DataFile) {
	t.Helper()
	if err := df.Write(filepath.Join(dir, name)); err != nil {
		t.Fatalf("write .data file %s: %v", name, err)
	}
}
func writeMapping(t *testing.T, dir, json string) {
	t.Helper()
	if err := os.WriteFile(filepath.Join(dir, "mapping.json"), []byte(json), 0644); err != nil {
		t.Fatalf("write mapping.json: %v", err)
	}
}
// parseCSV reads the output CSV, returning header and rows.
func parseCSV(t *testing.T, path string) ([]string, [][]string) {
	t.Helper()
	f, err := os.Open(path)
	if err != nil {
		t.Fatalf("open CSV %s: %v", path, err)
	}
	defer f.Close()
	r := csv.NewReader(f)
	header, err := r.Read()
	if err != nil {
		t.Fatalf("read header: %v", err)
	}
	rows, err := r.ReadAll()
	if err != nil {
		t.Fatalf("read rows: %v", err)
	}
	return header, rows
}
// clipLabels calls CallsClipLabels with standard test parameters.
func clipLabels(t *testing.T, dir string, extra ...func(*CallsClipLabelsInput)) CallsClipLabelsOutput {
	t.Helper()
	input := CallsClipLabelsInput{
		Folder:          dir,
		MappingPath:     filepath.Join(dir, "mapping.json"),
		OutputPath:      filepath.Join(dir, "clip_labels.csv"),
		ClipDuration:    5,
		ClipOverlap:     0,
		MinLabelOverlap: 0.25,
		FinalClip:       "full",
	}
	for _, fn := range extra {
		fn(&input)
	}
	out, err := CallsClipLabels(input)
	if err != nil {
		t.Fatalf("CallsClipLabels: %v", err)
	}
	return out
}
// --- tests ---
func TestClipLabels_RealClassTrue(t *testing.T) {
	dir := t.TempDir()
	writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
		Meta: &utils.DataMeta{Duration: 20},
		Segments: []*utils.Segment{
			{
				StartTime: 3, EndTime: 8, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
			},
		},
	})
	writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
	out := clipLabels(t, dir)
	header, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
	// Header: file, start_time, end_time, Kiwi
	if len(header) != 4 || header[3] != "Kiwi" {
		t.Fatalf("header = %v, want [..., Kiwi]", header)
	}
	// Clip 0-5 overlaps segment 3-8 by 2s ≥ 0.25 → Kiwi=True
	// Clip 5-10 overlaps segment 3-8 by 3s ≥ 0.25 → Kiwi=True
	// Clip 10-15, 15-20 → Kiwi=False
	kiwiCol := 3
	for i, row := range rows {
		switch row[1] {
		case "0.0", "5.0":
			if row[kiwiCol] != "True" {
				t.Errorf("row %d (start=%s): Kiwi=%s, want True", i, row[1], row[kiwiCol])
			}
		case "10.0", "15.0":
			if row[kiwiCol] != "False" {
				t.Errorf("row %d (start=%s): Kiwi=%s, want False", i, row[1], row[kiwiCol])
			}
		}
	}
	if out.PerClassTrueCount["Kiwi"] != 2 {
		t.Errorf("PerClassTrueCount[Kiwi] = %d, want 2", out.PerClassTrueCount["Kiwi"])
	}
}
func TestClipLabels_GapClipsAllFalse(t *testing.T) {
	dir := t.TempDir()
	// 15s file, Kiwi segment 0-5 only → clips 5-10 and 10-15 are gaps
	writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
		Meta: &utils.DataMeta{Duration: 15},
		Segments: []*utils.Segment{
			{
				StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
			},
		},
	})
	writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
	out := clipLabels(t, dir)
	if out.ClipsAllFalseGap != 2 {
		t.Errorf("ClipsAllFalseGap = %d, want 2", out.ClipsAllFalseGap)
	}
	if out.PerClassTrueCount["Kiwi"] != 1 {
		t.Errorf("PerClassTrueCount[Kiwi] = %d, want 1", out.PerClassTrueCount["Kiwi"])
	}
	if out.RowsWritten != 3 {
		t.Errorf("RowsWritten = %d, want 3", out.RowsWritten)
	}
}
func TestClipLabels_NegativeOverridesPositive(t *testing.T) {
	dir := t.TempDir()
	// Kiwi segment 0-8, Not segment 0-4 → clip 0-5 overlaps both → __NEGATIVE__ wins
	// Clip 5-10 overlaps only Kiwi (3s) → True
	writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
		Meta: &utils.DataMeta{Duration: 10},
		Segments: []*utils.Segment{
			{
				StartTime: 0, EndTime: 8, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
			},
			{
				StartTime: 0, EndTime: 4, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{{Species: "Not", Certainty: 100, Filter: "f1"}},
			},
		},
	})
	writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Not":{"species":"__NEGATIVE__"}}`)
	out := clipLabels(t, dir)
	if out.ClipsNegative != 1 {
		t.Errorf("ClipsNegative = %d, want 1", out.ClipsNegative)
	}
	_, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
	// Clip 0-5: negative hit → all-False (Not overlaps 0-4 by 4s)
	if rows[0][3] != "False" {
		t.Errorf("clip 0-5 Kiwi = %s, want False (overridden by __NEGATIVE__)", rows[0][3])
	}
	// Clip 5-10: only Kiwi overlaps (3s) → True
	if rows[1][3] != "True" {
		t.Errorf("clip 5-10 Kiwi = %s, want True", rows[1][3])
	}
}
func TestClipLabels_IgnoreExcludesClip(t *testing.T) {
	dir := t.TempDir()
	// Don't Know segment 0-5, Kiwi segment 6-10
	// Clip 0-5 overlaps __IGNORE__ → excluded
	// Clip 5-10 overlaps Kiwi → emitted with True
	writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
		Meta: &utils.DataMeta{Duration: 15},
		Segments: []*utils.Segment{
			{
				StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{{Species: "Don't Know", Certainty: 0, Filter: "f1"}},
			},
			{
				StartTime: 6, EndTime: 10, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
			},
		},
	})
	writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Don't Know":{"species":"__IGNORE__"}}`)
	out := clipLabels(t, dir)
	if out.ClipsIgnored != 1 {
		t.Errorf("ClipsIgnored = %d, want 1", out.ClipsIgnored)
	}
	if out.SegmentsIgnored != 1 {
		t.Errorf("SegmentsIgnored = %d, want 1", out.SegmentsIgnored)
	}
	// Only 2 rows: clip 5-10 (Kiwi=True) and clip 10-15 (gap)
	if out.RowsWritten != 2 {
		t.Errorf("RowsWritten = %d, want 2", out.RowsWritten)
	}
}
func TestClipLabels_FilterRestrictsLabels(t *testing.T) {
	dir := t.TempDir()
	// Same time range, two filters. Only "wanted" should contribute.
	writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
		Meta: &utils.DataMeta{Duration: 10},
		Segments: []*utils.Segment{
			{
				StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{
					{Species: "Kiwi", Certainty: 100, Filter: "wanted"},
					{Species: "Not", Certainty: 100, Filter: "unwanted"},
				},
			},
		},
	})
	writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Not":{"species":"__NEGATIVE__"}}`)
	out := clipLabels(t, dir, func(in *CallsClipLabelsInput) { in.Filter = "wanted" })
	// Only Kiwi from "wanted" filter → clip 0-5 should be Kiwi=True
	// Not from "unwanted" filter should be ignored → no __NEGATIVE__ override
	if out.ClipsNegative != 0 {
		t.Errorf("ClipsNegative = %d, want 0 (Not filter excluded)", out.ClipsNegative)
	}
	if out.PerClassTrueCount["Kiwi"] != 1 {
		t.Errorf("PerClassTrueCount[Kiwi] = %d, want 1", out.PerClassTrueCount["Kiwi"])
	}
}
func TestClipLabels_MappingCoverageError(t *testing.T) {
	dir := t.TempDir()
	writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
		Meta: &utils.DataMeta{Duration: 10},
		Segments: []*utils.Segment{
			{
				StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{{Species: "Mystery", Certainty: 100, Filter: "f1"}},
			},
		},
	})
	writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
	input := CallsClipLabelsInput{
		Folder:          dir,
		MappingPath:     filepath.Join(dir, "mapping.json"),
		OutputPath:      filepath.Join(dir, "clip_labels.csv"),
		ClipDuration:    5,
		ClipOverlap:     0,
		MinLabelOverlap: 0.25,
		FinalClip:       "full",
	}
	_, err := CallsClipLabels(input)
	if err == nil {
		t.Fatal("expected error for missing species in mapping")
	}
	if !strings.Contains(err.Error(), "Mystery") {
		t.Errorf("error should mention missing species, got: %v", err)
	}
}
func TestClipLabels_AppendMode(t *testing.T) {
	dir := t.TempDir()
	writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
	// First file
	writeDataFile(t, dir, "a.wav.data", &utils.DataFile{
		Meta: &utils.DataMeta{Duration: 5},
		Segments: []*utils.Segment{
			{
				StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
			},
		},
	})
	out1 := clipLabels(t, dir)
	if out1.RowsWritten != 1 {
		t.Fatalf("first run: RowsWritten = %d, want 1", out1.RowsWritten)
	}
	// Second run on same output file but with a different input folder
	// Simulate append by running again — should fail on duplicate
	_, err := CallsClipLabels(CallsClipLabelsInput{
		Folder:          dir,
		MappingPath:     filepath.Join(dir, "mapping.json"),
		OutputPath:      filepath.Join(dir, "clip_labels.csv"),
		ClipDuration:    5,
		ClipOverlap:     0,
		MinLabelOverlap: 0.25,
		FinalClip:       "full",
	})
	if err == nil {
		t.Fatal("expected duplicate error on second run with same folder")
	}
	if !strings.Contains(err.Error(), "duplicate") {
		t.Errorf("error should mention duplicate, got: %v", err)
	}
}
func TestClipLabels_MultipleFiles(t *testing.T) {
	dir := t.TempDir()
	writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
	writeDataFile(t, dir, "a.wav.data", &utils.DataFile{
		Meta: &utils.DataMeta{Duration: 10},
		Segments: []*utils.Segment{
			{
				StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
			},
		},
	})
	writeDataFile(t, dir, "b.wav.data", &utils.DataFile{
		Meta: &utils.DataMeta{Duration: 5},
		Segments: []*utils.Segment{
			{
				StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
			},
		},
	})
	out := clipLabels(t, dir)
	if out.DataFilesParsed != 2 {
		t.Errorf("DataFilesParsed = %d, want 2", out.DataFilesParsed)
	}
	// a: 2 clips (0-5, 5-10), b: 1 clip (0-5) = 3 total
	if out.RowsWritten != 3 {
		t.Errorf("RowsWritten = %d, want 3", out.RowsWritten)
	}
	_, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
	files := map[string]int{}
	for _, r := range rows {
		files[r[0]]++
	}
	if len(files) != 2 {
		t.Errorf("expected 2 distinct files in CSV, got %d", len(files))
	}
}

File deletion: calls_clip_labels.go

BF:BFD[6.248737] → [6.563754:563798]

BF:BFD[6.563798] → [6.549877:549877]

B:BD[6.549877] → [6.549878:552934]

∅:D[20.17131] → [16.20047:20185]

B:BD[6.552934] → [16.20047:20185]

∅:D[16.20185] → [6.553163:553164]

B:BD[6.553163] → [6.553163:553164]

B:BD[6.553164] → [16.20186:20370]

∅:D[16.20370] → [6.553189:553271]

B:BD[6.553189] → [6.553189:553271]

B:BD[6.553271] → [16.20371:20387]

∅:D[16.20387] → [6.553289:553322]

B:BD[6.553289] → [6.553289:553322]

B:BD[6.553322] → [16.20388:20470]

∅:D[16.20470] → [6.553406:553480]

B:BD[6.553406] → [6.553406:553480]

B:BD[6.553480] → [16.20471:20569]

∅:D[16.20569] → [6.553580:553616]

B:BD[6.553580] → [6.553580:553616]

B:BD[6.553616] → [16.20570:20659]

∅:D[16.20659] → [6.553707:553710]

B:BD[6.553707] → [6.553707:553710]

∅:D[16.20689] → [6.553710:553711]

B:BD[6.553710] → [6.553710:553711]

B:BD[6.553711] → [16.20690:20928]

∅:D[16.20928] → [6.553787:553804]

B:BD[6.553787] → [6.553787:553804]

B:BD[6.553804] → [16.20929:20989]

∅:D[16.20989] → [6.553876:553879]

B:BD[6.553876] → [6.553876:553879]

B:BD[6.554402] → [6.554402:554428]

B:BD[6.554428] → [16.20990:21053]

∅:D[16.21053] → [6.554497:554501]

B:BD[6.554497] → [6.554497:554501]

B:BD[6.554616] → [6.554616:554650]

∅:D[16.21107] → [6.554650:554735]

B:BD[6.554650] → [6.554650:554735]

B:BD[6.554735] → [16.21108:21158]

∅:D[16.21158] → [6.554785:554836]

B:BD[6.554785] → [6.554785:554836]

B:BD[6.554836] → [16.21159:21254]

∅:D[16.21254] → [6.554931:555007]

B:BD[6.554931] → [6.554931:555007]

B:BD[6.555007] → [16.21255:21301]

∅:D[16.21301] → [6.555065:555130]

B:BD[6.555065] → [6.555065:555130]

B:BD[6.555130] → [16.21302:21361]

∅:D[16.21361] → [6.555185:555188]

B:BD[6.555185] → [6.555185:555188]

B:BD[6.555223] → [6.555223:555224]

B:BD[6.555252] → [6.555252:555330]

B:BD[6.555330] → [16.21362:21807]

∅:D[16.21807] → [6.555470:555473]

B:BD[6.555470] → [6.555470:555473]

∅:D[16.22066] → [6.555473:555474]

B:BD[6.555473] → [6.555473:555474]

B:BD[6.555474] → [16.22067:22957]

∅:D[16.22957] → [6.555546:555820]

B:BD[6.555546] → [6.555546:555820]

B:BD[6.555857] → [6.555857:556069]

B:BD[6.556092] → [6.556092:556370]

B:BD[6.556370] → [16.22958:23037]

∅:D[16.23037] → [6.556533:556536]

B:BD[6.556533] → [6.556533:556536]

B:BD[6.556785] → [6.556785:556786]

B:BD[6.556801] → [6.556801:557611]

∅:D[20.17324] → [6.557611:557614]

B:BD[6.557611] → [6.557611:557614]

∅:D[20.17415] → [6.557614:557615]

B:BD[6.557614] → [6.557614:557615]

B:BD[6.557615] → [20.17416:17824]

∅:D[20.17824] → [6.557928:557979]

B:BD[6.557928] → [6.557928:557979]

B:BD[6.557979] → [20.17825:17870]

∅:D[20.17870] → [6.558036:558205]

B:BD[6.558036] → [6.558036:558205]

B:BD[6.558205] → [20.17871:17960]

∅:D[20.17960] → [6.558306:558332]

B:BD[6.558306] → [6.558306:558332]

B:BD[6.558332] → [20.17961:18050]

∅:D[20.18050] → [6.558433:558534]

B:BD[6.558433] → [6.558433:558534]

B:BD[6.558534] → [20.18051:18155]

∅:D[20.18155] → [6.558650:558662]

B:BD[6.558650] → [6.558650:558662]

∅:D[20.18171] → [6.558662:558663]

B:BD[6.558662] → [6.558662:558663]

B:BD[6.558663] → [20.18172:18415]

∅:D[20.18415] → [6.558768:559111]

B:BD[6.558768] → [6.558768:559111]

∅:D[20.18435] → [6.559111:559112]

B:BD[6.559111] → [6.559111:559112]

B:BD[6.559112] → [20.18436:18683]

∅:D[20.18683] → [6.559140:559195]

B:BD[6.559140] → [6.559140:559195]

B:BD[6.559195] → [20.18684:18759]

∅:D[20.18759] → [6.559276:559525]

B:BD[6.559276] → [6.559276:559525]

B:BD[6.559589] → [6.559589:559813]

B:BD[6.559813] → [20.18760:18773]

∅:D[20.18773] → [6.559832:563753]

B:BD[6.559832] → [6.559832:563753]

B:BD[6.559111] → [20.18416:18435]

B:BD[6.558662] → [20.18156:18171]

B:BD[6.557614] → [20.17325:17415]

B:BD[6.557611] → [20.17132:17324]

B:BD[6.555473] → [16.21808:22066]

B:BD[6.554650] → [16.21054:21107]

B:BD[6.553710] → [16.20660:20689]

B:BD[6.552934] → [20.17130:17131]

package tools
import (
	"encoding/csv"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"slices"
	"sort"
	"strconv"
	"strings"
	"skraak/utils"
)
// CallsClipLabelsInput configures the clip-labels exporter.
type CallsClipLabelsInput struct {
	Folder          string  `json:"folder"`
	MappingPath     string  `json:"mapping"`
	Filter          string  `json:"filter,omitempty"`
	OutputPath      string  `json:"output"`
	ClipDuration    float64 `json:"clip_duration"`
	ClipOverlap     float64 `json:"clip_overlap"`
	MinLabelOverlap float64 `json:"min_label_overlap"`
	FinalClip       string  `json:"final_clip"`
}
// CallsClipLabelsOutput summarises a run.
type CallsClipLabelsOutput struct {
	Folder            string         `json:"folder"`
	OutputPath        string         `json:"output"`
	Filter            string         `json:"filter,omitempty"`
	Classes           []string       `json:"classes"`
	DataFilesParsed   int            `json:"data_files_parsed"`
	ClipsNegative     int            `json:"clips_negative"`      // emitted, all-False because of __NEGATIVE__
	ClipsIgnored      int            `json:"clips_ignored"`       // excluded from output because of __IGNORE__ overlap
	SegmentsIgnored   int            `json:"segments_ignored"`    // segments whose species maps to __IGNORE__
	ClipsAllFalseGap  int            `json:"clips_all_false_gap"` // emitted, all-False because no overlap
	PerClassTrueCount map[string]int `json:"per_class_true_count"`
	AppendedToFile    bool           `json:"appended_to_file"`
	ExistingRowsFound int            `json:"existing_rows_found"`
	RowsWritten       int            `json:"rows_written"`
}
// resolvedSeg is a segment that has been classified by the mapping and is
// ready for overlap-checking against clip windows.
type resolvedSeg struct {
	start, end float64
	kind       utils.MappingKind
	classIdx   int // valid only when kind == utils.MappingReal
}
// clipDisposition describes the outcome for a single clip window.
type clipDisposition int
const (
	dispoLabelled clipDisposition = iota // at least one class column is True
	dispoNegative                        // __NEGATIVE__ hit, all class columns False
	dispoGap                             // no segment overlaps, all class columns False
	dispoIgnored                         // __IGNORE__ hit, clip excluded from output
)
// clipLabelsRow is one row of the output CSV.
type clipLabelsRow struct {
	file  string
	start float64
	end   float64
	flags []bool
}
// rowKey is used for duplicate detection.
type rowKey struct {
	file  string
	start string
	end   string
}
// CallsClipLabels reads .data files from a single folder and writes a CSV in
// OpenSoundScape's clip_labels format: one row per clip per file, with one
// True/False column per class in the mapping.
//
// Mirrors BoxedAnnotations.clip_labels(): every clip window is emitted; a
// column is True when any annotation of that class overlaps the window by
// ≥ min_label_overlap seconds. Sentinel mappings (__NEGATIVE__, __IGNORE__)
// get no column and contribute no labels.
// parsedClipFile holds a parsed .data file for clip-labels processing.
type parsedClipFile struct {
	path string
	df   *utils.DataFile
}
// validateClipLabelsInput validates the input parameters and returns the parsed finalClipMode.
func validateClipLabelsInput(input CallsClipLabelsInput) (utils.FinalClipMode, error) {
	finalClipMode, err := utils.ParseFinalClipMode(input.FinalClip)
	if err != nil {
		return 0, err
	}
	if input.ClipDuration <= 0 {
		return 0, fmt.Errorf("--clip-duration must be > 0, got %v", input.ClipDuration)
	}
	if input.ClipOverlap < 0 || input.ClipOverlap >= input.ClipDuration {
		return 0, fmt.Errorf("--clip-overlap must be in [0, clip-duration), got %v", input.ClipOverlap)
	}
	if input.MinLabelOverlap <= 0 {
		return 0, fmt.Errorf("--min-label-overlap must be > 0, got %v", input.MinLabelOverlap)
	}
// parseClipLabelsDataFiles finds and parses .data files, collecting species seen.
func parseClipLabelsDataFiles(folder, filter string, mapping utils.MappingFile) ([]parsedClipFile, error) {
	dataPaths, err := utils.FindDataFiles(folder)
	if err != nil {
		return nil, fmt.Errorf("scan folder %s: %w", folder, err)
	}
	if len(dataPaths) == 0 {
		return nil, fmt.Errorf("no .data files found in %s", folder)
	}
	speciesSeen := map[string]bool{}
	for _, p := range dataPaths {
		df, err := utils.ParseDataFile(p)
		if err != nil {
			return nil, fmt.Errorf("parse %s: %w", p, err)
		}
		if df.Meta == nil || df.Meta.Duration <= 0 {
			return nil, fmt.Errorf("missing or non-positive Duration in %s (cannot generate clips)", p)
		}
		for _, seg := range df.Segments {
			for _, lbl := range seg.Labels {
				if filter != "" && lbl.Filter != filter {
					continue
				}
				speciesSeen[lbl.Species] = true
			}
		}
		parsed = append(parsed, parsedClipFile{path: p, df: df})
	}
	if missing := mapping.ValidateCoversSpecies(speciesSeen); len(missing) > 0 {
		return nil, fmt.Errorf("mapping.json is missing entries for species: %s\n(run /data-mapping to regenerate)", strings.Join(missing, ", "))
	}
	return parsed, nil
}
// dedupClipLabelsRows checks for duplicate rows within new rows and against existing CSV rows.
func dedupClipLabelsRows(rows []clipLabelsRow, existing map[rowKey]bool) error {
	dedup := make(map[rowKey]bool, len(existing)+len(rows))
	for k := range existing {
		dedup[k] = true
	}
func CallsClipLabels(input CallsClipLabelsInput) (CallsClipLabelsOutput, error) {
	out := CallsClipLabelsOutput{
		Folder:            input.Folder,
		OutputPath:        input.OutputPath,
		PerClassTrueCount: map[string]int{},
	}
	finalClipMode, err := validateClipLabelsInput(input)
	if err != nil {
		return out, err
	}
	mapping, err := utils.LoadMappingFile(input.MappingPath)
	if err != nil {
		return out, fmt.Errorf("load mapping %s: %w", input.MappingPath, err)
	}
	classes := mapping.Classes()
	if len(classes) == 0 {
		return out, fmt.Errorf("mapping.json has no real (non-sentinel) classes")
	}
	out.Classes = classes
	out.Filter = input.Filter
	classIdx := map[string]int{}
	for i, c := range classes {
		classIdx[c] = i
	}
	parsed, err := parseClipLabelsDataFiles(input.Folder, input.Filter, mapping)
	if err != nil {
		return out, err
	}
	out.DataFilesParsed = len(parsed)
	expectedHeader := append([]string{"file", "start_time", "end_time"}, classes...)
	existing, appendMode, err := loadExistingRows(input.OutputPath, expectedHeader)
	if err != nil {
		return out, err
	}
	out.AppendedToFile = appendMode
	out.ExistingRowsFound = len(existing)
	cwd, err := os.Getwd()
	if err != nil {
		return out, fmt.Errorf("getwd: %w", err)
	}
	folderAbs, err := filepath.Abs(input.Folder)
	if err != nil {
		return out, fmt.Errorf("abs %s: %w", input.Folder, err)
	}
	rows := make([]clipLabelsRow, 0, 1024)
	for _, pf := range parsed {
		fileRows, err := processClipLabelsFile(pf.path, pf.df, mapping, classIdx, classes, input, finalClipMode, cwd, folderAbs, &out)
		if err != nil {
			return out, err
		}
		rows = append(rows, fileRows...)
	}
	if err := dedupClipLabelsRows(rows, existing); err != nil {
		return out, err
	}
	if err := writeRows(input.OutputPath, expectedHeader, rows, appendMode); err != nil {
		return out, err
	}
	out.RowsWritten = len(rows)
	sort.Strings(out.Classes)
	return out, nil
}
// processClipLabelsFile generates clip-labels rows for a single .data file.
func processClipLabelsFile(
	path string,
	df *utils.DataFile,
	mapping utils.MappingFile,
	classIdx map[string]int,
	classes []string,
	input CallsClipLabelsInput,
	finalClipMode utils.FinalClipMode,
	cwd, folderAbs string,
	out *CallsClipLabelsOutput,
) ([]clipLabelsRow, error) {
	windows, err := utils.GenerateClipTimes(
		df.Meta.Duration,
		input.ClipDuration,
		input.ClipOverlap,
		finalClipMode,
		10,
	)
	if err != nil {
		return nil, fmt.Errorf("generate clip windows for %s: %w", path, err)
	}
	if len(windows) == 0 {
		return nil, nil
	}
// resolveSegments maps segments to their classification and filters out mismatches.
func resolveSegments(
	segments []*utils.Segment,
	filter string,
	minLabelOverlap float64,
	mapping utils.MappingFile,
	classIdx map[string]int,
	out *CallsClipLabelsOutput,
) []resolvedSeg {
	segs := make([]resolvedSeg, 0, len(segments))
	for _, seg := range segments {
		if seg.EndTime-seg.StartTime < minLabelOverlap {
			continue
		}
		for _, lbl := range seg.Labels {
			if filter != "" && lbl.Filter != filter {
				continue
			}
			canon, kind, ok := mapping.Classify(lbl.Species)
			if !ok {
				continue
			}
			switch kind {
			case utils.MappingIgn:
				out.SegmentsIgnored++
				segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind})
			case utils.MappingNeg:
				segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind})
			case utils.MappingReal:
				idx, present := classIdx[canon]
				if !present {
					continue
				}
				segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind, classIdx: idx})
			}
		}
	}
// computeWavRelPath computes the relative path from cwd to the WAV file corresponding to a .data file.
func computeWavRelPath(dataPath, cwd, folderAbs string) (string, error) {
	wavName := strings.TrimSuffix(filepath.Base(dataPath), ".data")
	wavAbs := filepath.Join(folderAbs, wavName)
	rel, err := filepath.Rel(cwd, wavAbs)
	if err != nil {
		rel = wavAbs
	}
	// Ensure relative paths start with ./ to match OPSO / pandas convention.
	if rel != "" && !filepath.IsAbs(rel) && !strings.HasPrefix(rel, "."+string(filepath.Separator)) {
		rel = "." + string(filepath.Separator) + rel
	}
// labelClipWindows classifies each clip window and builds the output rows.
func labelClipWindows(windows []utils.ClipWindow, segs []resolvedSeg, rel string, classes []string, minLabelOverlap float64, out *CallsClipLabelsOutput) []clipLabelsRow {
	var rows []clipLabelsRow
	for _, w := range windows {
		dispo, classHits := classifyClip(w, segs, minLabelOverlap, len(classes))
		if dispo == dispoIgnored {
			out.ClipsIgnored++
			continue
		}
		row := clipLabelsRow{
			file:  rel,
			start: w.Start,
			end:   w.End,
			flags: make([]bool, len(classes)),
		}
		switch dispo {
		case dispoNegative:
			out.ClipsNegative++
		case dispoGap:
			out.ClipsAllFalseGap++
		case dispoLabelled:
			for i, hit := range classHits {
				if hit {
					row.flags[i] = true
					out.PerClassTrueCount[classes[i]]++
				}
			}
		}
		rows = append(rows, row)
	}
	return rows
}
// classifyClip determines the disposition of a single clip window against
// the resolved segments. Priority: __IGNORE__ > __NEGATIVE__ > class labels.
func classifyClip(w utils.ClipWindow, segs []resolvedSeg, minLabelOverlap float64, nClasses int) (clipDisposition, []bool) {
	ignoreHit := false
	negativeHit := false
	classHits := make([]bool, nClasses)
	for _, s := range segs {
		if overlapSeconds(s.start, s.end, w.Start, w.End) < minLabelOverlap {
			continue
		}
		switch s.kind {
		case utils.MappingIgn:
			ignoreHit = true
		case utils.MappingNeg:
			negativeHit = true
		case utils.MappingReal:
			classHits[s.classIdx] = true
		}
	}
	if ignoreHit {
		return dispoIgnored, nil
	}
	if negativeHit {
		return dispoNegative, classHits
	}
	for _, hit := range classHits {
		if hit {
			return dispoLabelled, classHits
		}
	}
	return dispoGap, classHits
}
// loadExistingRows reads an existing output CSV and returns its row keys
// (for deduplication) and whether we're in append mode.
func loadExistingRows(outputPath string, expectedHeader []string) (map[rowKey]bool, bool, error) {
	fi, err := os.Stat(outputPath)
	if err != nil {
		if os.IsNotExist(err) {
			return nil, false, nil
		}
		return nil, false, fmt.Errorf("stat %s: %w", outputPath, err)
	}
	if fi.Size() == 0 {
		return nil, false, nil
	}
	f, err := os.Open(outputPath)
	if err != nil {
		return nil, false, fmt.Errorf("open existing %s: %w", outputPath, err)
	}
	defer func() { _ = f.Close() }()
	r := csv.NewReader(f)
	r.FieldsPerRecord = -1
	header, err := r.Read()
	if err != nil {
		return nil, false, fmt.Errorf("read header of existing %s: %w", outputPath, err)
	}
	if !slices.Equal(header, expectedHeader) {
		return nil, false, fmt.Errorf("column-set mismatch in existing %s\n  existing: %s\n  new:      %s",
			outputPath, strings.Join(header, ","), strings.Join(expectedHeader, ","))
	}
	existing := map[rowKey]bool{}
	for {
		rec, err := r.Read()
		if err == io.EOF {
			break
		}
		if err != nil {
			return nil, false, fmt.Errorf("read row of existing %s: %w", outputPath, err)
		}
		if len(rec) < 3 {
			return nil, false, fmt.Errorf("malformed row in existing %s: %v", outputPath, rec)
		}
		existing[rowKey{file: rec[0], start: rec[1], end: rec[2]}] = true
	}
	return existing, true, nil
}
// overlapSeconds returns the duration of overlap between two half-open intervals.
func overlapSeconds(aStart, aEnd, bStart, bEnd float64) float64 {
	lo := max(aStart, bStart)
	hi := min(aEnd, bEnd)
	if hi <= lo {
		return 0
	}
	return hi - lo
}
// formatTime renders a float to match pandas' default float repr in to_csv:
// always at least one decimal place, no trailing zeros beyond what's needed.
// e.g. 5 → "5.0", 5.5 → "5.5", 3.5001250000 → "3.500125".
func formatTime(v float64) string {
	s := strconv.FormatFloat(v, 'f', -1, 64)
	if !strings.ContainsRune(s, '.') {
		s += ".0"
	}
	return s
}
// writeRows writes the clip-labels rows to a CSV file.
func writeRows(path string, header []string, rows []clipLabelsRow, appendMode bool) error {
	var f *os.File
	var err error
	if appendMode {
		f, err = os.OpenFile(path, os.O_APPEND|os.O_WRONLY, 0644)
	} else {
		f, err = os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
	}
	if err != nil {
		return fmt.Errorf("open %s for write: %w", path, err)
	}
	defer func() { _ = f.Close() }()
	w := csv.NewWriter(f)
	if !appendMode {
		if err := w.Write(header); err != nil {
			return fmt.Errorf("write header: %w", err)
		}
	}
	if len(rows) == 0 {
		w.Flush()
		return w.Error()
	}
	rec := make([]string, 3+len(rows[0].flags))
	for _, r := range rows {
		rec[0] = r.file
		rec[1] = formatTime(r.start)
		rec[2] = formatTime(r.end)
		for i, b := range r.flags {
			if b {
				rec[3+i] = "True"
			} else {
				rec[3+i] = "False"
			}
		}
		if err := w.Write(rec); err != nil {
			return fmt.Errorf("write row: %w", err)
		}
	}
	w.Flush()
	return w.Error()
}
	return rel, nil
}
	return segs
}
	return labelClipWindows(windows, segs, rel, classes, input.MinLabelOverlap, out), nil
}
	}
	segs := resolveSegments(df.Segments, input.Filter, input.MinLabelOverlap, mapping, classIdx, out)
	rel, err := computeWavRelPath(path, cwd, folderAbs)
	if err != nil {
		return nil, err
	for _, r := range rows {
		k := rowKey{file: r.file, start: formatTime(r.start), end: formatTime(r.end)}
		if dedup[k] {
			return fmt.Errorf("duplicate clip detected: file=%s start=%s end=%s", k.file, k.start, k.end)
		}
		dedup[k] = true
	}
	return nil
}
	parsed := make([]parsedClipFile, 0, len(dataPaths))
	return finalClipMode, nil
}

File deletion: calls_clip_bench_test.go

BF:BFD[6.248737] → [6.574044:574092]

BF:BFD[6.574092] → [6.563800:563800]

B:BD[6.563800] → [6.563801:572389]

B:BD[6.572871] → [6.572871:574043]

package tools
import (
	"encoding/binary"
	"math"
	"os"
	"testing"
	"skraak/utils"
)
const benchWAV = "../audio/20211028_211500.WAV"
// ==================== WAV I/O ====================
func BenchmarkReadWAV(b *testing.B) {
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		_, _, err := utils.ReadWAVSamples(benchWAV)
		if err != nil {
			b.Fatal(err)
		}
	}
}
func BenchmarkConvertToFloat64_16bit(b *testing.B) {
	// Simulate 16-bit mono WAV data (same size as test file: 14.32M samples)
	numSamples := 14320000
	data := make([]byte, numSamples*2)
	for i := range numSamples {
		binary.LittleEndian.PutUint16(data[i*2:], uint16(i%65536))
	}
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		_ = convertToFloat64Bench(data, 16, 1)
	}
}
// Duplicate of convertToFloat64 for benchmarking (unexported in utils)
func convertToFloat64Bench(data []byte, bitsPerSample, channels int) []float64 {
	bytesPerSample := bitsPerSample / 8
	blockAlign := bytesPerSample * channels
	numSamples := len(data) / blockAlign
	samples := make([]float64, numSamples)
	for i := range numSamples {
		offset := i * blockAlign
		sample := int16(binary.LittleEndian.Uint16(data[offset : offset+2]))
		samples[i] = float64(sample) / 32768.0
	}
	return samples
}
func BenchmarkWriteWAV(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
	b.Logf("segment samples=%d", len(segSamples))
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		f, _ := os.CreateTemp("", "bench_*.wav")
		utils.WriteWAVFile(f.Name(), segSamples, sr)
		f.Close()
		os.Remove(f.Name())
	}
}
// ==================== Resample ====================
func BenchmarkResampleRate_48k(b *testing.B) {
	samples, _, _ := utils.ReadWAVSamples(benchWAV)
	b.Logf("resampling %d samples 48000->16000", len(samples))
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		utils.ResampleRate(samples, 48000, 16000)
	}
}
func BenchmarkResampleRate_250k(b *testing.B) {
	samples, _, _ := utils.ReadWAVSamples(benchWAV)
	b.Logf("resampling %d samples 250000->16000", len(samples))
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		utils.ResampleRate(samples, 250000, 16000)
	}
}
// ==================== Spectrogram pipeline ====================
func BenchmarkExtractSegment(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	b.Logf("full file: %d samples, sr=%d", len(samples), sr)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		seg := utils.ExtractSegmentSamples(samples, sr, 872, 895)
		if len(seg) == 0 {
			b.Fatal("empty segment")
		}
	}
}
func BenchmarkPowerSpectrumFFT_512(b *testing.B) {
	n := 512
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
	frameData := make([]float64, n)
	power := make([]float64, n/2+1)
	scratch := make([]complex128, n)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		// Simulate the windowing step (Hann) + FFT
		for j := range n {
			frameData[j] = segSamples[j] * 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(j)/float64(n-1)))
		}
		utils.PowerSpectrumFFT(frameData, power, scratch)
	}
}
func BenchmarkSpectrogram_23s(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
	cfg := utils.DefaultSpectrogramConfig(16000)
	b.Logf("segment samples=%d, windowSize=%d, hopSize=%d", len(segSamples), cfg.WindowSize, cfg.HopSize)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		spect := utils.GenerateSpectrogram(segSamples, cfg)
		if spect == nil {
			b.Fatal("nil spectrogram")
		}
	}
}
func BenchmarkSpectrogram_60s(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 0, 60)
	cfg := utils.DefaultSpectrogramConfig(16000)
	b.Logf("60s segment samples=%d", len(segSamples))
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		spect := utils.GenerateSpectrogram(segSamples, cfg)
		if spect == nil {
			b.Fatal("nil spectrogram")
		}
	}
}
// ==================== Image creation & resize ====================
func BenchmarkCreateGrayscaleImage(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
	cfg := utils.DefaultSpectrogramConfig(16000)
	spect := utils.GenerateSpectrogram(segSamples, cfg)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		img := utils.CreateGrayscaleImage(spect)
		if img == nil {
			b.Fatal("nil image")
		}
	}
}
func BenchmarkCreateRGBImage(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
	cfg := utils.DefaultSpectrogramConfig(16000)
	spect := utils.GenerateSpectrogram(segSamples, cfg)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		colorData := utils.ApplyL4Colormap(spect)
		img := utils.CreateRGBImage(colorData)
		if img == nil {
			b.Fatal("nil image")
		}
	}
}
func BenchmarkApplyL4Colormap(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
	cfg := utils.DefaultSpectrogramConfig(16000)
	spect := utils.GenerateSpectrogram(segSamples, cfg)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		colorData := utils.ApplyL4Colormap(spect)
		if colorData == nil {
			b.Fatal("nil colormap")
		}
	}
}
func BenchmarkResizeGray224(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
	cfg := utils.DefaultSpectrogramConfig(16000)
	spect := utils.GenerateSpectrogram(segSamples, cfg)
	img := utils.CreateGrayscaleImage(spect)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		resized := utils.ResizeImage(img, 224, 224)
		if resized == nil {
			b.Fatal("nil resize")
		}
	}
}
func BenchmarkResizeGray448(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
	cfg := utils.DefaultSpectrogramConfig(16000)
	spect := utils.GenerateSpectrogram(segSamples, cfg)
	img := utils.CreateGrayscaleImage(spect)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		resized := utils.ResizeImage(img, 448, 448)
		if resized == nil {
			b.Fatal("nil resize")
		}
	}
}
// ==================== PNG write ====================
func BenchmarkWritePNG_224(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
	cfg := utils.DefaultSpectrogramConfig(16000)
	spect := utils.GenerateSpectrogram(segSamples, cfg)
	img := utils.CreateGrayscaleImage(spect)
	resized := utils.ResizeImage(img, 224, 224)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		f, _ := os.CreateTemp("", "bench_*.png")
		utils.WritePNG(resized, f)
		f.Close()
		os.Remove(f.Name())
	}
}
// ==================== Full pipeline ====================
func BenchmarkFullPipelineGray224(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
		outputSR := sr
		if sr > 16000 {
			segSamples = utils.ResampleRate(segSamples, sr, 16000)
			outputSR = 16000
		}
		cfg := utils.DefaultSpectrogramConfig(outputSR)
		spect := utils.GenerateSpectrogram(segSamples, cfg)
		img := utils.CreateGrayscaleImage(spect)
		resized := utils.ResizeImage(img, 224, 224)
		f, _ := os.CreateTemp("", "bench_*.png")
		utils.WritePNG(resized, f)
		f.Close()
		os.Remove(f.Name())
		utils.WriteWAVFile(f.Name(), segSamples, outputSR)
		os.Remove(f.Name())
		_ = resized
	}
}
func BenchmarkFullPipelineColor448(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
		outputSR := sr
		if sr > 16000 {
			segSamples = utils.ResampleRate(segSamples, sr, 16000)
			outputSR = 16000
		}
		cfg := utils.DefaultSpectrogramConfig(outputSR)
		spect := utils.GenerateSpectrogram(segSamples, cfg)
		colorData := utils.ApplyL4Colormap(spect)
		img := utils.CreateRGBImage(colorData)
		resized := utils.ResizeImage(img, 448, 448)
		f, _ := os.CreateTemp("", "bench_*.png")
		utils.WritePNG(resized, f)
		f.Close()
		os.Remove(f.Name())
		utils.WriteWAVFile(f.Name(), segSamples, outputSR)
		os.Remove(f.Name())
		_ = resized
	}
}
// ==================== Data dimension report ====================
func TestPipelineDimensions(t *testing.T) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
	t.Logf("Input: %d samples, sr=%d, segment=%d samples (%.1fs)",
		len(samples), sr, len(segSamples), float64(len(segSamples))/float64(sr))
	cfg := utils.DefaultSpectrogramConfig(16000)
	numFrames := (len(segSamples)-cfg.WindowSize)/cfg.HopSize + 1
	numBins := cfg.WindowSize/2 + 1
	t.Logf("Spectrogram: %d freq bins x %d time frames = %d values",
		numBins, numFrames, numBins*numFrames)
	spect := utils.GenerateSpectrogram(segSamples, cfg)
	t.Logf("Output: %d x %d (freq x time)", len(spect), len(spect[0]))
	img := utils.CreateGrayscaleImage(spect)
	t.Logf("Grayscale image: %dx%d pixels, %d bytes",
		img.Bounds().Dx(), img.Bounds().Dy(), img.Bounds().Dx()*img.Bounds().Dy())
	resized := utils.ResizeImage(img, 224, 224)
	t.Logf("Resized 224: %dx%d", resized.Bounds().Dx(), resized.Bounds().Dy())
	resized448 := utils.ResizeImage(img, 448, 448)
	t.Logf("Resized 448: %dx%d", resized448.Bounds().Dx(), resized448.Bounds().Dy())
}

File deletion: calls_clip.go

BF:BFD[6.248737] → [6.584765:584802]

BF:BFD[6.584802] → [6.574094:574094]

B:BD[6.574094] → [6.574095:574177]

∅:D[21.98] → [6.574177:574299]

B:BD[6.574177] → [6.574177:574299]

B:BD[6.574299] → [22.87:393]

B:BD[22.393] → [21.99:206]

∅:D[21.206] → [6.574817:575438]

∅:D[22.555] → [6.574817:575438]

B:BD[6.574817] → [6.574817:575438]

B:BD[6.575438] → [23.7634:7714]

∅:D[23.7714] → [6.575947:576075]

B:BD[6.575947] → [6.575947:576075]

B:BD[6.576075] → [23.7715:7805]

∅:D[23.7805] → [6.576373:576377]

B:BD[6.576373] → [6.576373:576377]

B:BD[6.576523] → [6.576523:576828]

∅:D[21.494] → [6.576828:576908]

B:BD[6.576828] → [6.576828:576908]

B:BD[6.576908] → [21.495:599]

∅:D[21.599] → [6.577545:577555]

∅:D[23.7890] → [6.577545:577555]

B:BD[6.577545] → [6.577545:577555]

B:BD[6.577555] → [21.600:702]

∅:D[21.702] → [23.7973:8647]

B:BD[23.7973] → [23.7973:8647]

∅:D[23.8647] → [6.577671:577672]

B:BD[6.577671] → [6.577671:577672]

B:BD[6.577672] → [23.8648:9215]

∅:D[21.1531] → [23.9215:9217]

B:BD[23.9215] → [23.9215:9217]

∅:D[23.9217] → [6.577821:577822]

B:BD[6.577821] → [6.577821:577822]

B:BD[6.577822] → [23.9218:9281]

B:BD[23.9281] → [21.1532:1707]

∅:D[21.1707] → [23.9421:9459]

B:BD[23.9421] → [23.9421:9459]

B:BD[23.9459] → [21.1708:1904]

∅:D[22.770] → [23.9688:9759]

∅:D[21.1904] → [23.9688:9759]

B:BD[23.9688] → [23.9688:9759]

∅:D[23.9759] → [6.578238:578239]

B:BD[6.578238] → [6.578238:578239]

B:BD[6.578239] → [23.9760:9831]

B:BD[23.9831] → [21.1905:2078]

∅:D[21.2078] → [23.9969:10050]

B:BD[23.9969] → [23.9969:10050]

∅:D[23.10050] → [6.578316:578317]

B:BD[6.578316] → [6.578316:578317]

B:BD[6.578317] → [23.10051:10197]

∅:D[23.10197] → [6.578368:578369]

B:BD[6.578368] → [6.578368:578369]

B:BD[6.578369] → [23.10198:10291]

B:BD[23.10291] → [21.2079:2277]

∅:D[22.987] → [23.10522:10592]

∅:D[21.2277] → [23.10522:10592]

B:BD[23.10522] → [23.10522:10592]

∅:D[23.10592] → [6.578735:578740]

B:BD[6.578735] → [6.578735:578740]

B:BD[6.578740] → [23.10593:10659]

∅:D[23.10659] → [6.578744:578747]

B:BD[6.578744] → [6.578744:578747]

∅:D[23.10673] → [6.578747:578748]

B:BD[6.578747] → [6.578747:578748]

B:BD[6.578748] → [23.10674:11309]

∅:D[23.11309] → [6.578768:578883]

B:BD[6.578768] → [6.578768:578883]

B:BD[6.578883] → [22.988:1183]

∅:D[22.1183] → [6.579087:579563]

B:BD[6.579087] → [6.579087:579563]

B:BD[6.579563] → [23.11310:11407]

∅:D[23.11407] → [6.579778:579811]

B:BD[6.579778] → [6.579778:579811]

B:BD[6.579811] → [23.11408:11429]

∅:D[23.11429] → [6.579860:579943]

B:BD[6.579860] → [6.579860:579943]

B:BD[6.580030] → [6.580030:580049]

B:BD[6.580049] → [23.11430:11582]

∅:D[23.11582] → [6.580278:580304]

B:BD[6.580278] → [6.580278:580304]

B:BD[6.580553] → [6.580553:580771]

B:BD[6.580771] → [23.11583:11613]

B:BD[23.11613] → [22.1184:1311]

∅:D[22.1311] → [23.11749:13058]

B:BD[23.11749] → [23.11749:13058]

B:BD[23.13058] → [22.1312:1496]

∅:D[22.1496] → [23.13251:13351]

B:BD[23.13251] → [23.13251:13351]

B:BD[23.13351] → [22.1497:1625]

∅:D[22.1625] → [6.581042:581245]

B:BD[6.581042] → [6.581042:581245]

B:BD[6.581245] → [22.1626:1754]

∅:D[22.1754] → [23.13489:13492]

B:BD[23.13489] → [23.13489:13492]

∅:D[23.13492] → [6.581309:581310]

B:BD[6.581309] → [6.581309:581310]

B:BD[6.581310] → [23.13493:13517]

∅:D[23.13517] → [6.581484:581485]

B:BD[6.581484] → [6.581484:581485]

B:BD[6.581485] → [23.13518:13599]

B:BD[23.13599] → [22.1755:1947]

∅:D[22.1947] → [23.13800:14097]

B:BD[23.13800] → [23.13800:14097]

B:BD[23.14097] → [22.1948:2077]

∅:D[22.2077] → [23.14235:14428]

B:BD[23.14235] → [23.14235:14428]

∅:D[23.14428] → [6.581920:581926]

B:BD[6.581920] → [6.581920:581926]

B:BD[6.581926] → [23.14429:14442]

∅:D[23.14442] → [6.581936:581937]

B:BD[6.581936] → [6.581936:581937]

B:BD[6.581937] → [23.14443:14505]

∅:D[23.14505] → [6.582011:582012]

B:BD[6.582011] → [6.582011:582012]

B:BD[6.582012] → [23.14506:14553]

∅:D[23.14553] → [6.582063:582064]

B:BD[6.582063] → [6.582063:582064]

B:BD[6.582064] → [23.14554:14722]

∅:D[23.14722] → [6.582201:582208]

B:BD[6.582201] → [6.582201:582208]

B:BD[6.582208] → [23.14723:14745]

∅:D[23.14745] → [6.582234:582295]

B:BD[6.582234] → [6.582234:582295]

B:BD[6.582295] → [22.2078:2240]

∅:D[22.2240] → [6.582466:583225]

B:BD[6.582466] → [6.582466:583225]

B:BD[6.583225] → [22.2241:2295]

∅:D[22.2295] → [6.583346:583347]

B:BD[6.583346] → [6.583346:583347]

B:BD[6.583347] → [22.2296:2541]

∅:D[22.2541] → [6.583598:583599]

B:BD[6.583598] → [6.583598:583599]

B:BD[6.583599] → [22.2542:2835]

∅:D[22.2835] → [6.583903:583904]

B:BD[6.583903] → [6.583903:583904]

B:BD[6.583904] → [22.2836:2889]

∅:D[22.2889] → [6.583958:583959]

B:BD[6.583958] → [6.583958:583959]

B:BD[6.583959] → [22.2890:3126]

∅:D[22.3126] → [6.584264:584268]

B:BD[6.584264] → [6.584264:584268]

B:BD[6.584268] → [22.3127:3426]

∅:D[22.3426] → [6.584549:584552]

B:BD[6.584549] → [6.584549:584552]

∅:D[22.3459] → [6.584552:584764]

B:BD[6.584552] → [6.584552:584764]

B:BD[6.584552] → [22.3427:3459]

B:BD[6.578747] → [23.10660:10673]

B:BD[6.576828] → [21.207:334]

B:BD[21.334] → [24.87:151]

∅:D[24.151] → [21.392:494]

B:BD[21.392] → [21.392:494]

package tools
import (
	"fmt"
	"image"
	"math"
	"os"
	"path/filepath"
	"runtime"
	"strings"
	"sync"
	"skraak/utils"
)
// CallsClipInput defines the input for the clip tool
type CallsClipInput struct {
	File      string `json:"file"`
	Folder    string `json:"folder"`
	Output    string `json:"output"`
	Prefix    string `json:"prefix"`
	Filter    string `json:"filter"`
	Species   string `json:"species"`
	Certainty int    `json:"certainty"`
	Size      int    `json:"size"`
	Color     bool   `json:"color"`
	Night    bool   `json:"night"`
	Day      bool   `json:"day"`
	Location string `json:"location,omitempty"`
}
// CallsClipOutput defines the output for the clip tool
type CallsClipOutput struct {
	FilesProcessed  int      `json:"files_processed"`
	SegmentsClipped int      `json:"segments_clipped"`
	NightSkipped    int      `json:"night_skipped,omitempty"`
	DaySkipped      int      `json:"day_skipped,omitempty"`
	OutputFiles     []string `json:"output_files"`
	Errors          []string `json:"errors,omitempty"`
}
// CallsClip processes .data files and generates audio/image clips for matching segments
func CallsClip(input CallsClipInput) (CallsClipOutput, error) {
	var output CallsClipOutput
	// Validate required flags
	if err := validateClipInput(&output, input); err != nil {
		return output, err
	}
	// Parse species+calltype
	speciesName, callType := utils.ParseSpeciesCallType(input.Species)
	// Get list of .data files
	filePaths, err := resolveClipFiles(&output, input)
	if err != nil {
		return output, err
	}
	// Create output folder if it doesn't exist
	if err := os.MkdirAll(input.Output, 0755); err != nil {
		output.Errors = append(output.Errors, fmt.Sprintf("failed to create output folder: %v", err))
		return output, err
	}
	// Clamp image size to valid range
	imgSize := utils.ClampImageSize(input.Size)
	// Process .data files (parallel for larger batches)
	if len(filePaths) <= 2 {
		processFilesSequential(&output, filePaths, input, speciesName, callType, imgSize, lat, lng, timezone)
	} else {
		processFilesParallel(&output, filePaths, input, speciesName, callType, imgSize, lat, lng, timezone)
	}
	return output, nil
}
// validateClipInput validates required flags for clip generation.
func validateClipInput(output *CallsClipOutput, input CallsClipInput) error {
	if input.File == "" && input.Folder == "" {
		output.Errors = append(output.Errors, "either --file or --folder is required")
		return fmt.Errorf("missing required flag: --file or --folder")
	}
	if input.Output == "" {
		output.Errors = append(output.Errors, "--output is required")
		return fmt.Errorf("missing required flag: --output")
	}
	if input.Prefix == "" {
		output.Errors = append(output.Errors, "--prefix is required")
		return fmt.Errorf("missing required flag: --prefix")
	}
	return nil
}
// resolveClipFiles returns the list of .data file paths from input.
func resolveClipFiles(output *CallsClipOutput, input CallsClipInput) ([]string, error) {
	if input.File != "" {
		return []string{input.File}, nil
	}
	filePaths, err := utils.FindDataFiles(input.Folder)
	if err != nil {
		output.Errors = append(output.Errors, fmt.Sprintf("failed to find .data files: %v", err))
		return nil, err
	}
	if len(filePaths) == 0 {
		output.Errors = append(output.Errors, "no .data files found")
		return nil, fmt.Errorf("no .data files found")
	}
	return filePaths, nil
}
// processFilesSequential processes .data files one at a time.
func processFilesSequential(output *CallsClipOutput, filePaths []string, input CallsClipInput, speciesName, callType string, imgSize int, lat, lng float64, timezone string) {
	for _, dataPath := range filePaths {
		clips, skipped, errs := processFile(dataPath, input.Output, input.Prefix, input.Filter, speciesName, callType, input.Certainty, imgSize, input.Color, input.Night, input.Day, lat, lng, timezone)
		accumulateFileResult(output, clips, skipped, errs, input.Night)
	}
}
// processFilesParallel processes .data files using worker goroutines.
func processFilesParallel(output *CallsClipOutput, filePaths []string, input CallsClipInput, speciesName, callType string, imgSize int, lat, lng float64, timezone string) {
	type fileResult struct {
		clips   []string
		skipped int
		errs    []string
	}
	workers := min(runtime.NumCPU(), 8, len(filePaths))
	jobs := make(chan string, len(filePaths))
	results := make(chan fileResult, len(filePaths))
	var wg sync.WaitGroup
	for range workers {
		wg.Go(func() {
			for dataPath := range jobs {
				clips, skipped, errs := processFile(dataPath, input.Output, input.Prefix, input.Filter, speciesName, callType, input.Certainty, imgSize, input.Color, input.Night, input.Day, lat, lng, timezone)
				results <- fileResult{clips: clips, skipped: skipped, errs: errs}
			}
		})
	}
	for _, dataPath := range filePaths {
		jobs <- dataPath
	}
	go func() {
		wg.Wait()
		close(results)
	}()
	for r := range results {
		accumulateFileResult(output, r.clips, r.skipped, r.errs, input.Night)
	}
}
// accumulateFileResult merges a single file's results into the output.
func accumulateFileResult(output *CallsClipOutput, clips []string, skipped int, errs []string, night bool) {
	output.SegmentsClipped += len(clips)
	if night {
		output.NightSkipped += skipped
	} else {
		output.DaySkipped += skipped
	}
	output.OutputFiles = append(output.OutputFiles, clips...)
	output.Errors = append(output.Errors, errs...)
	if len(clips) > 0 || len(errs) == 0 {
		output.FilesProcessed++
	}
}
// processFile processes a single .data file and returns generated clips, time-filter-skipped count, and errors
func processFile(dataPath, outputDir, prefix, filter, speciesName, callType string, certainty, imgSize int, color, night, day bool, lat, lng float64, timezone string) ([]string, int, []string) {
	var clips []string
	var errors []string
	// Parse .data file
	dataFile, err := utils.ParseDataFile(dataPath)
	if err != nil {
		errors = append(errors, fmt.Sprintf("%s: failed to parse: %v", dataPath, err))
		return nil, 0, errors
	}
	// Get WAV basename (without path and extensions)
	wavPath := filepath.Clean(strings.TrimSuffix(dataPath, ".data"))
	basename := filepath.Base(wavPath)
	basename = strings.TrimSuffix(basename, filepath.Ext(basename))
	// Filter segments
	matchingSegments := filterSegments(dataFile.Segments, filter, speciesName, callType, certainty)
	if len(matchingSegments) == 0 {
		return nil, 0, nil
	}
	// Day/night filter: check WAV header only (cheaper than reading full audio).
	if night || day {
		skipped, err := checkDayNightFilter(wavPath, night, day, lat, lng, timezone)
		if err != nil || skipped {
			if skipped {
				return nil, 1, nil
			}
			return nil, 0, nil
		}
	}
	// Read WAV samples once
	samples, sampleRate, err := utils.ReadWAVSamples(wavPath)
	if err != nil {
		errors = append(errors, fmt.Sprintf("%s: failed to read WAV: %v", dataPath, err))
		return nil, 0, errors
	}
	// Process matching segments
	clips, errors = processSegments(matchingSegments, dataPath, samples, sampleRate, outputDir, prefix, basename, imgSize, color)
	return clips, 0, errors
}
// filterSegments returns segments matching the given filter criteria.
func filterSegments(segments []*utils.Segment, filter, speciesName, callType string, certainty int) []*utils.Segment {
	var matching []*utils.Segment
	for _, seg := range segments {
		if seg.SegmentMatchesFilters(filter, speciesName, callType, certainty) {
			matching = append(matching, seg)
		}
	}
	return matching
}
// checkDayNightFilter applies day/night filtering. Returns (skipped=true, nil) if the
// recording should be skipped, (false, nil) if it passes, or (false, err) on failure.
func checkDayNightFilter(wavPath string, night, day bool, lat, lng float64, timezone string) (bool, error) {
	result, err := IsNight(IsNightInput{
		FilePath: wavPath,
		Lat:      lat,
		Lng:      lng,
		Timezone: timezone,
	})
	if err != nil {
		fmt.Fprintf(os.Stderr, "warning: skipping %s (isnight error: %v)\n", wavPath, err)
		return false, err
	}
	if night && !result.SolarNight {
		fmt.Fprintf(os.Stderr, "skipped (daytime): %s\n", wavPath)
		return true, nil
	}
	if day && !result.DiurnalActive {
		fmt.Fprintf(os.Stderr, "skipped (nighttime): %s\n", wavPath)
		return true, nil
	}
	return false, nil
}
// processSegments generates clips for matching segments, using parallel processing for larger batches.
func processSegments(segments []*utils.Segment, dataPath string, samples []float64, sampleRate int, outputDir, prefix, basename string, imgSize int, color bool) ([]string, []string) {
	var clips []string
	var errors []string
	if len(segments) <= 2 {
		for _, seg := range segments {
			clipFiles, err := generateClip(samples, sampleRate, outputDir, prefix, basename, seg.StartTime, seg.EndTime, imgSize, color)
			if err != nil {
				errors = append(errors, fmt.Sprintf("%s: segment %.0f-%.0f: %v", dataPath, seg.StartTime, seg.EndTime, err))
				continue
			}
			clips = append(clips, clipFiles...)
		}
	} else {
		clips, errors = processSegmentsParallel(segments, dataPath, samples, sampleRate, outputDir, prefix, basename, imgSize, color)
	}
	return clips, errors
}
// processSegmentsParallel generates clips for segments using worker goroutines.
func processSegmentsParallel(segments []*utils.Segment, dataPath string, samples []float64, sampleRate int, outputDir, prefix, basename string, imgSize int, color bool) ([]string, []string) {
	type segResult struct {
		clips []string
		err   string
	}
	workers := min(runtime.NumCPU(), len(segments))
	jobs := make(chan *utils.Segment, len(segments))
	results := make(chan segResult, len(segments))
	var wg sync.WaitGroup
	for range workers {
		wg.Go(func() {
			for seg := range jobs {
				clipFiles, err := generateClip(samples, sampleRate, outputDir, prefix, basename, seg.StartTime, seg.EndTime, imgSize, color)
				if err != nil {
					results <- segResult{err: fmt.Sprintf("%s: segment %.0f-%.0f: %v", dataPath, seg.StartTime, seg.EndTime, err)}
				} else {
					results <- segResult{clips: clipFiles}
				}
			}
		})
	}
	for _, seg := range segments {
		jobs <- seg
	}
	close(jobs)
	go func() {
		wg.Wait()
		close(results)
	}()
	var clips []string
	var errors []string
	for r := range results {
		if r.err != "" {
			errors = append(errors, r.err)
		} else {
			clips = append(clips, r.clips...)
		}
	}
	return clips, errors
}
// generateClip generates PNG and WAV files for a segment
func generateClip(samples []float64, sampleRate int, outputDir, prefix, basename string, startTime, endTime float64, imgSize int, color bool) ([]string, error) {
	var files []string
	// Calculate integer times for filename
	startInt := int(math.Floor(startTime))
	endInt := int(math.Ceil(endTime))
	// Build base filename
	baseName := fmt.Sprintf("%s_%s_%d_%d", prefix, basename, startInt, endInt)
	wavPath := filepath.Join(outputDir, baseName+".wav")
	// Extract segment samples
	segSamples := utils.ExtractSegmentSamples(samples, sampleRate, startTime, endTime)
	if len(segSamples) == 0 {
		return nil, fmt.Errorf("no samples in segment")
	}
	// Determine output sample rate (downsample if > 16kHz)
	outputSampleRate := sampleRate
	if sampleRate > utils.DefaultMaxSampleRate {
		segSamples = utils.ResampleRate(segSamples, sampleRate, utils.DefaultMaxSampleRate)
		outputSampleRate = utils.DefaultMaxSampleRate
	}
	pngPath := filepath.Join(outputDir, baseName+".png")
	spectSampleRate := outputSampleRate
	config := utils.DefaultSpectrogramConfig(spectSampleRate)
	spectrogram := utils.GenerateSpectrogram(segSamples, config)
	if spectrogram == nil {
		return nil, fmt.Errorf("failed to generate spectrogram")
	}
	// Create image (grayscale or color)
	var img image.Image
	if color {
		colorData := utils.ApplyL4Colormap(spectrogram)
		img = utils.CreateRGBImage(colorData)
	} else {
		img = utils.CreateGrayscaleImage(spectrogram)
	}
	if img == nil {
		return nil, fmt.Errorf("failed to create image")
	}
	resized := utils.ResizeImage(img, imgSize, imgSize)
	// Write PNG (O_EXCL fails atomically if file exists)
	pngFile, err := os.OpenFile(pngPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0644)
	if err != nil {
		if os.IsExist(err) {
			return nil, fmt.Errorf("file already exists: %s", pngPath)
		}
		return nil, fmt.Errorf("failed to create PNG: %w", err)
	}
	if err := utils.WritePNG(resized, pngFile); err != nil {
		_ = pngFile.Close()
		return nil, fmt.Errorf("failed to write PNG: %w", err)
	}
	if err := pngFile.Close(); err != nil {
		return nil, fmt.Errorf("failed to close PNG: %w", err)
	}
	// Write WAV
	if err := utils.WriteWAVFile(wavPath, segSamples, outputSampleRate); err != nil {
		return nil, fmt.Errorf("failed to write WAV: %w", err)
	}
	files = append(files, wavPath)
	return files, nil
}
	files = append(files, pngPath)
	close(jobs)
	// Parse location into lat/lng/timezone
	var lat, lng float64
	var timezone string
	if input.Location != "" {
		var err error
		lat, lng, timezone, err = utils.ParseLocation(input.Location)
		if err != nil {
			output.Errors = append(output.Errors, err.Error())
			return output, err
		}
	}

File deletion: calls_classify_test.go

BF:BFD[6.248737] → [6.590621:590667]

BF:BFD[6.590667] → [6.584804:584804]

B:BD[6.584804] → [6.584805:590620]

package tools
import (
	"testing"
	"skraak/utils"
)
func NewClassifyState(config ClassifyConfig, dataFiles []*utils.DataFile) *ClassifyState {
	hasFilter := config.Filter != "" || config.Species != "" || config.Certainty >= 0
	cached := make([][]*utils.Segment, len(dataFiles))
	for i, df := range dataFiles {
		if !hasFilter {
			cached[i] = df.Segments
		} else {
			for _, seg := range df.Segments {
				if seg.SegmentMatchesFilters(config.Filter, config.Species, config.CallType, config.Certainty) {
					cached[i] = append(cached[i], seg)
				}
			}
		}
	}
	total := 0
	for _, segs := range cached {
		total += len(segs)
	}
	return &ClassifyState{
		Config:       config,
		DataFiles:    dataFiles,
		filteredSegs: cached,
		totalSegs:    total,
	}
}
func TestParseKeyBuffer(t *testing.T) {
	bindings := []KeyBinding{
		{Key: "k", Species: "Kiwi"},
		{Key: "d", Species: "Kiwi", CallType: "Duet"},
		{Key: "n", Species: "Don't Know"},
		{Key: "p", Species: "Morepork"},
	}
	state := NewClassifyState(ClassifyConfig{Bindings: bindings, Certainty: -1}, nil)
	tests := []struct {
		key     string
		want    *BindingResult
		wantNil bool
	}{
		{"k", &BindingResult{Species: "Kiwi"}, false},
		{"d", &BindingResult{Species: "Kiwi", CallType: "Duet"}, false},
		{"n", &BindingResult{Species: "Don't Know"}, false},
		{"p", &BindingResult{Species: "Morepork"}, false},
		{"x", nil, true}, // unknown key
	}
	for _, tt := range tests {
		got := state.ParseKeyBuffer(tt.key)
		if tt.wantNil {
			if got != nil {
				t.Errorf("ParseKeyBuffer(%q) = %v, want nil", tt.key, got)
			}
		} else {
			if got == nil {
				t.Errorf("ParseKeyBuffer(%q) = nil, want %+v", tt.key, tt.want)
				continue
			}
			if got.Species != tt.want.Species {
				t.Errorf("ParseKeyBuffer(%q).Species = %q, want %q", tt.key, got.Species, tt.want.Species)
			}
			if got.CallType != tt.want.CallType {
				t.Errorf("ParseKeyBuffer(%q).CallType = %q, want %q", tt.key, got.CallType, tt.want.CallType)
			}
		}
	}
}
func TestApplyBinding(t *testing.T) {
	bindings := []KeyBinding{
		{Key: "k", Species: "Kiwi"},
		{Key: "n", Species: "Don't Know"},
		{Key: "d", Species: "Kiwi", CallType: "Duet"},
	}
	df := &utils.DataFile{
		Meta: &utils.DataMeta{},
		Segments: []*utils.Segment{
			{
				StartTime: 10.0,
				EndTime:   20.0,
				Labels: []*utils.Label{
					{Species: "Unknown", Certainty: 50, Filter: "test-filter", CallType: "OldType"},
				},
			},
		},
	}
	state := NewClassifyState(ClassifyConfig{
		Filter:    "test-filter",
		Reviewer:  "David",
		Bindings:  bindings,
		Certainty: -1,
	}, []*utils.DataFile{df})
	// Apply "k" = Kiwi (no calltype, should remove existing calltype)
	result := &BindingResult{Species: "Kiwi"}
	state.ApplyBinding(result)
	// Check label was updated
	if len(df.Segments[0].Labels) != 1 {
		t.Errorf("expected 1 label, got %d", len(df.Segments[0].Labels))
	}
	if df.Segments[0].Labels[0].Species != "Kiwi" {
		t.Errorf("expected Species=Kiwi, got %s", df.Segments[0].Labels[0].Species)
	}
	if df.Segments[0].Labels[0].Certainty != 100 {
		t.Errorf("expected Certainty=100, got %d", df.Segments[0].Labels[0].Certainty)
	}
	if df.Segments[0].Labels[0].CallType != "" {
		t.Errorf("expected CallType='', got %s (should be removed)", df.Segments[0].Labels[0].CallType)
	}
	if df.Meta.Reviewer != "David" {
		t.Errorf("expected Reviewer=David, got %s", df.Meta.Reviewer)
	}
	// Apply "d" = Kiwi/Duet (should set calltype)
	result = &BindingResult{Species: "Kiwi", CallType: "Duet"}
	state.ApplyBinding(result)
	if df.Segments[0].Labels[0].CallType != "Duet" {
		t.Errorf("expected CallType=Duet, got %s", df.Segments[0].Labels[0].CallType)
	}
	// Apply "n" = Don't Know (certainty should be 0)
	result = &BindingResult{Species: "Don't Know"}
	state.ApplyBinding(result)
	if df.Segments[0].Labels[0].Species != "Don't Know" {
		t.Errorf("expected Species=Don't Know, got %s", df.Segments[0].Labels[0].Species)
	}
	if df.Segments[0].Labels[0].Certainty != 0 {
		t.Errorf("expected Certainty=0 for Don't Know, got %d", df.Segments[0].Labels[0].Certainty)
	}
}
func TestApplyBindingCallTypeRemoval(t *testing.T) {
	bindings := []KeyBinding{
		{Key: "k", Species: "Kiwi"}, // no calltype
	}
	df := &utils.DataFile{
		Meta: &utils.DataMeta{},
		Segments: []*utils.Segment{
			{
				StartTime: 10.0,
				EndTime:   20.0,
				Labels: []*utils.Label{
					{Species: "Kiwi", Certainty: 100, Filter: "test-filter", CallType: "Male"},
				},
			},
		},
	}
	state := NewClassifyState(ClassifyConfig{
		Filter:    "test-filter",
		Reviewer:  "David",
		Bindings:  bindings,
		Certainty: -1,
	}, []*utils.DataFile{df})
	// Apply "k" = Kiwi (should remove Male calltype)
	result := &BindingResult{Species: "Kiwi"}
	state.ApplyBinding(result)
	if df.Segments[0].Labels[0].CallType != "" {
		t.Errorf("expected CallType='', got %s (should be removed)", df.Segments[0].Labels[0].CallType)
	}
}
func TestConfirmLabelDontKnow(t *testing.T) {
	df := &utils.DataFile{
		Meta: &utils.DataMeta{},
		Segments: []*utils.Segment{
			{
				StartTime: 10.0,
				EndTime:   20.0,
				Labels: []*utils.Label{
					{Species: "Don't Know", Certainty: 0, Filter: "test-filter"},
				},
			},
		},
	}
	state := NewClassifyState(ClassifyConfig{
		Filter:    "test-filter",
		Reviewer:  "David",
		Certainty: -1,
	}, []*utils.DataFile{df})
	// ConfirmLabel on Don't Know should be a no-op
	if state.ConfirmLabel() {
		t.Error("ConfirmLabel() should return false for Don't Know (certainty=0)")
	}
	label := df.Segments[0].Labels[0]
	if label.Species != "Don't Know" {
		t.Errorf("Species should remain Don't Know, got %s", label.Species)
	}
	if label.Certainty != 0 {
		t.Errorf("Certainty should remain 0, got %d", label.Certainty)
	}
	if state.Dirty {
		t.Error("State should not be dirty after confirming Don't Know")
	}
}

File deletion: calls_classify_load_test.go

BF:BFD[6.248737] → [6.597282:597333]

BF:BFD[6.597333] → [6.590669:590669]

B:BD[6.590669] → [6.590670:590731]

B:BD[6.590731] → [19.8948:9196]

∅:D[19.9196] → [6.591097:591115]

B:BD[6.591097] → [6.591097:591115]

∅:D[19.9199] → [6.591115:591116]

B:BD[6.591115] → [6.591115:591116]

B:BD[6.591116] → [19.9200:9410]

∅:D[19.9410] → [6.591745:591777]

B:BD[6.591745] → [6.591745:591777]

B:BD[6.592006] → [6.592006:592009]

∅:D[19.9427] → [6.592009:592010]

B:BD[6.592009] → [6.592009:592010]

B:BD[6.592010] → [19.9428:9754]

∅:D[19.9754] → [6.592227:592230]

B:BD[6.592227] → [6.592227:592230]

B:BD[6.592230] → [19.9755:9888]

∅:D[19.9888] → [6.592339:592342]

B:BD[6.592339] → [6.592339:592342]

B:BD[6.592342] → [19.9889:10749]

∅:D[19.10749] → [6.592465:592466]

B:BD[6.592465] → [6.592465:592466]

B:BD[6.592466] → [19.10750:10955]

∅:D[19.10955] → [6.592920:592921]

B:BD[6.592920] → [6.592920:592921]

B:BD[6.592921] → [19.10956:11176]

∅:D[19.11176] → [6.593502:593561]

B:BD[6.593502] → [6.593502:593561]

B:BD[6.593632] → [6.593632:593657]

B:BD[6.593713] → [6.593713:593942]

B:BD[6.593942] → [19.11177:11328]

∅:D[19.11328] → [6.594056:594057]

B:BD[6.594056] → [6.594056:594057]

B:BD[6.594272] → [6.594272:595271]

B:BD[6.595271] → [19.11329:11382]

∅:D[19.11382] → [6.595384:595385]

B:BD[6.595384] → [6.595384:595385]

B:BD[6.595385] → [19.11383:11479]

∅:D[19.11479] → [6.595532:596143]

B:BD[6.595532] → [6.595532:596143]

B:BD[6.596193] → [6.596193:596218]

B:BD[6.596218] → [19.11480:11749]

∅:D[19.11749] → [6.596453:596454]

B:BD[6.596453] → [6.596453:596454]

B:BD[6.596454] → [19.11750:11830]

∅:D[19.11830] → [6.596691:596692]

B:BD[6.596691] → [6.596691:596692]

B:BD[6.596692] → [19.11831:11885]

∅:D[19.11885] → [6.597101:597281]

B:BD[6.597101] → [6.597101:597281]

B:BD[6.592009] → [19.9411:9427]

B:BD[6.591115] → [19.9197:9199]

package tools
import (
	"os"
	"path/filepath"
	"testing"
)
// writeDataFileContent creates a .data file in dir with the given raw content.
func writeDataFileContent(t *testing.T, dir, name, content string) {
	t.Helper()
	if err := os.WriteFile(filepath.Join(dir, name), []byte(content), 0644); err != nil {
		t.Fatal(err)
	}
// mustLoadDataFiles is a test helper that calls LoadDataFiles and fatals on error.
func mustLoadDataFiles(t *testing.T, config ClassifyConfig) *ClassifyState {
	t.Helper()
	state, err := LoadDataFiles(config)
	if err != nil {
		t.Fatal(err)
	}
// assertFileSegCounts checks file count and total segment count match expected values.
func assertFileSegCounts(t *testing.T, state *ClassifyState, wantFiles, wantSegs int, label string) {
	t.Helper()
	if len(state.DataFiles) != wantFiles {
		t.Errorf("%s: expected %d files, got %d", label, wantFiles, len(state.DataFiles))
	}
	if state.TotalSegments() != wantSegs {
		t.Errorf("%s: expected %d segments total, got %d", label, wantSegs, state.TotalSegments())
	}
}
const (
	kiwiSeg   = `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]]]`
	tomtitSeg = `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Tomtit", "certainty": 90}]]]`
)
func TestLoadDataFilesFiltersFilesWithNoMatchingSegments(t *testing.T) {
	tempDir := t.TempDir()
	writeDataFileContent(t, tempDir, "file1.data", kiwiSeg)
	writeDataFileContent(t, tempDir, "file2.data", tomtitSeg)
	writeDataFileContent(t, tempDir, "file3.data", kiwiSeg)
	t.Run("no_filter", func(t *testing.T) {
		state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Certainty: -1})
		assertFileSegCounts(t, state, 3, 3, "No filter")
	})
	t.Run("species_kiwi", func(t *testing.T) {
		state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})
		assertFileSegCounts(t, state, 2, 2, "Species=Kiwi")
	})
	t.Run("species_tomtit", func(t *testing.T) {
		state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Tomtit", Certainty: -1})
		assertFileSegCounts(t, state, 1, 1, "Species=Tomtit")
	})
	t.Run("species_nonexistent", func(t *testing.T) {
		state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "NonExistent", Certainty: -1})
		assertFileSegCounts(t, state, 0, 0, "Species=NonExistent")
	})
}
func TestLoadDataFilesWithMixedSegments(t *testing.T) {
	tempDir := t.TempDir()
	file := `[
		{"Operator": "test"},
		[0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]],
		[10, 20, 100, 1000, [{"species": "Tomtit", "certainty": 80}]],
		[20, 30, 100, 1000, [{"species": "Kiwi", "certainty": 95}]]
	]`
	writeDataFileContent(t, tempDir, "mixed.data", file)
	state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})
	if len(state.DataFiles) != 1 {
		t.Errorf("Expected 1 file, got %d", len(state.DataFiles))
	}
	if state.TotalSegments() != 2 {
		t.Errorf("Species=Kiwi: expected 2 segments, got %d", state.TotalSegments())
	}
	// The DataFile should still have all 3 segments internally
	// but cached filtered segments should return only the Kiwi ones
	if len(state.DataFiles[0].Segments) != 3 {
		t.Errorf("DataFile should have 3 segments internally, got %d", len(state.DataFiles[0].Segments))
	}
	// TotalSegments uses cached filtered segments
	if state.TotalSegments() != 2 {
		t.Errorf("TotalSegments should return 2 Kiwi segments, got %d", state.TotalSegments())
	}
}
// Test that the original DataFile segments are not modified (immutable filtering)
func TestFilteringDoesNotModifyOriginalSegments(t *testing.T) {
	tempDir := t.TempDir()
	file := `[
		{"Operator": "test"},
		[0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]],
		[10, 20, 100, 1000, [{"species": "Tomtit", "certainty": 80}]]
	]`
	writeDataFileContent(t, tempDir, "test.data", file)
	state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})
	// Original segments should be untouched
	originalSegments := state.DataFiles[0].Segments
	if len(originalSegments) != 2 {
		t.Errorf("Original should have 2 segments, got %d", len(originalSegments))
	}
	// Verify all original segments are preserved
	species := []string{}
	for _, seg := range originalSegments {
		if len(seg.Labels) > 0 {
			species = append(species, seg.Labels[0].Species)
		}
	}
	if len(species) != 2 || species[0] != "Kiwi" || species[1] != "Tomtit" {
		t.Errorf("Original segments should have both species, got %v", species)
	}
}
func TestLoadDataFilesCertaintyPruning(t *testing.T) {
	tempDir := t.TempDir()
	writeDataFileContent(t, tempDir, "file1.data", `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 70}]]]`)
	writeDataFileContent(t, tempDir, "file2.data", `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 100}]]]`)
	state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Certainty: 100})
	assertFileSegCounts(t, state, 1, 1, "Certainty=100")
	// CurrentSegment should work (not nil) because file1 was pruned
	seg := state.CurrentSegment()
	if seg == nil {
		t.Error("CurrentSegment should not be nil after pruning")
	}
}
	return state
}
}

File deletion: calls_classify_filter_test.go

BF:BFD[6.248737] → [6.605661:605714]

BF:BFD[6.605714] → [6.597335:597335]

B:BD[6.597335] → [6.597336:605658]

∅:D[25.2050] → [6.605658:605660]

B:BD[6.605658] → [6.605658:605660]

B:BD[6.605658] → [25.480:2050]

package tools
import (
	"math/rand"
	"testing"
	"skraak/utils"
)
func TestTotalSegmentsRespectsFilters(t *testing.T) {
	// Create test data files with different species and filters
	df1 := &utils.DataFile{
		FilePath: "/test/file1.data",
		Segments: []*utils.Segment{
			{
				StartTime: 0,
				EndTime:   10,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0"},
				},
			},
			{
				StartTime: 10,
				EndTime:   20,
				Labels: []*utils.Label{
					{Species: "Tomtit", Filter: "model-1.0"},
				},
			},
		},
	}
	df2 := &utils.DataFile{
		FilePath: "/test/file2.data",
		Segments: []*utils.Segment{
			{
				StartTime: 0,
				EndTime:   10,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0"},
				},
			},
		},
	}
	// Test 1: No filters - should count all segments (3)
	state1 := NewClassifyState(ClassifyConfig{Certainty: -1}, []*utils.DataFile{df1, df2})
	if got := state1.TotalSegments(); got != 3 {
		t.Errorf("No filters: expected 3 segments, got %d", got)
	}
	// Test 2: Filter by species "Kiwi" - should count only Kiwi segments (2)
	state2 := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df1, df2})
	if got := state2.TotalSegments(); got != 2 {
		t.Errorf("Species=Kiwi: expected 2 segments, got %d", got)
	}
	// Test 3: Filter by species "Tomtit" - should count only Tomtit segments (1)
	state3 := NewClassifyState(ClassifyConfig{Species: "Tomtit", Certainty: -1}, []*utils.DataFile{df1, df2})
	if got := state3.TotalSegments(); got != 1 {
		t.Errorf("Species=Tomtit: expected 1 segment, got %d", got)
	}
	// Test 4: Filter by filter name "model-1.0" - should count all segments (3)
	state4 := NewClassifyState(ClassifyConfig{Filter: "model-1.0", Certainty: -1}, []*utils.DataFile{df1, df2})
	if got := state4.TotalSegments(); got != 3 {
		t.Errorf("Filter=model-1.0: expected 3 segments, got %d", got)
	}
	// Test 5: Filter by non-existent species - should count 0
	state5 := NewClassifyState(ClassifyConfig{Species: "NonExistent", Certainty: -1}, []*utils.DataFile{df1, df2})
	if got := state5.TotalSegments(); got != 0 {
		t.Errorf("Species=NonExistent: expected 0 segments, got %d", got)
	}
	// Test 6: Combined filter + species
	df3 := &utils.DataFile{
		FilePath: "/test/file3.data",
		Segments: []*utils.Segment{
			{
				StartTime: 0,
				EndTime:   10,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0", CallType: "Duet"},
				},
			},
			{
				StartTime: 10,
				EndTime:   20,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-2.0", CallType: "Male"},
				},
			},
		},
	}
	state6 := NewClassifyState(ClassifyConfig{Filter: "model-1.0", Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df3})
	if got := state6.TotalSegments(); got != 1 {
		t.Errorf("Filter=model-1.0 + Species=Kiwi: expected 1 segment, got %d", got)
	}
}
func TestCurrentSegmentNumberWithFilters(t *testing.T) {
	// Create test data files
	df1 := &utils.DataFile{
		FilePath: "/test/file1.data",
		Segments: []*utils.Segment{
			{
				StartTime: 0,
				EndTime:   10,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0"},
				},
			},
			{
				StartTime: 10,
				EndTime:   20,
				Labels: []*utils.Label{
					{Species: "Tomtit", Filter: "model-1.0"},
				},
			},
		},
	}
	df2 := &utils.DataFile{
		FilePath: "/test/file2.data",
		Segments: []*utils.Segment{
			{
				StartTime: 0,
				EndTime:   10,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0"},
				},
			},
		},
	}
	// Test: Filter by species "Kiwi", at file 2, segment 0
	// Should report current segment as 2 (first Kiwi in df1 + first Kiwi in df2)
	state := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df1, df2})
	state.FileIdx = 1 // at df2
	state.SegmentIdx = 0
	if got := state.CurrentSegmentNumber(); got != 2 {
		t.Errorf("Species=Kiwi, at file 2, seg 0: expected current segment 2, got %d", got)
	}
}
func TestCertaintyFiltering(t *testing.T) {
	// Create test data files with different certainty levels
	df := &utils.DataFile{
		FilePath: "/test/file1.data",
		Segments: []*utils.Segment{
			{
				StartTime: 0,
				EndTime:   10,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0", Certainty: 70},
				},
			},
			{
				StartTime: 10,
				EndTime:   20,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0", Certainty: 100},
				},
			},
			{
				StartTime: 20,
				EndTime:   30,
				Labels: []*utils.Label{
					{Species: "Tomtit", Filter: "model-1.0", Certainty: 70},
				},
			},
		},
	}
	// Test 1: Filter by certainty 70 - should get 2 segments
	state1 := NewClassifyState(ClassifyConfig{Certainty: 70}, []*utils.DataFile{df})
	if got := state1.TotalSegments(); got != 2 {
		t.Errorf("Certainty=70: expected 2 segments, got %d", got)
	}
	// Test 2: Filter by certainty 100 - should get 1 segment
	state2 := NewClassifyState(ClassifyConfig{Certainty: 100}, []*utils.DataFile{df})
	if got := state2.TotalSegments(); got != 1 {
		t.Errorf("Certainty=100: expected 1 segment, got %d", got)
	}
	// Test 3: Filter by certainty 0 - should get 0 segments
	state3 := NewClassifyState(ClassifyConfig{Certainty: 0}, []*utils.DataFile{df})
	if got := state3.TotalSegments(); got != 0 {
		t.Errorf("Certainty=0: expected 0 segments, got %d", got)
	}
	// Test 4: Combined species + certainty
	state4 := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: 70}, []*utils.DataFile{df})
	if got := state4.TotalSegments(); got != 1 {
		t.Errorf("Species=Kiwi + Certainty=70: expected 1 segment, got %d", got)
	}
}
func TestSampling(t *testing.T) {
	makeSegs := func(n int) []*utils.Segment {
		s := make([]*utils.Segment, n)
		for i := range s {
			s[i] = &utils.Segment{StartTime: float64(i), EndTime: float64(i + 1)}
		}
		return s
	}
	df1 := &utils.DataFile{FilePath: "/test/f1.data", Segments: makeSegs(6)}
	df2 := &utils.DataFile{FilePath: "/test/f2.data", Segments: makeSegs(4)}
	kept := []*utils.DataFile{df1, df2}
	cached := [][]*utils.Segment{df1.Segments, df2.Segments}
	countTotal := func(c [][]*utils.Segment) int {
		n := 0
		for _, s := range c {
			n += len(s)
		}
		return n
	}
	// 50% of 10 → 5
	k, c := applySampling(kept, cached, 50, rand.New(rand.NewSource(42)))
	if got := countTotal(c); got != 5 {
		t.Errorf("sample 50%%: expected 5, got %d", got)
	}
	// Files must be in original chronological order
	for i := 1; i < len(k); i++ {
		if k[i].FilePath < k[i-1].FilePath {
			t.Errorf("sample 50%%: files out of order at index %d", i)
		}
	}
	// 10% of 10 → 1
	_, c2 := applySampling(kept, cached, 10, rand.New(rand.NewSource(42)))
	if got := countTotal(c2); got != 1 {
		t.Errorf("sample 10%%: expected 1, got %d", got)
	}
	// 1% of 10 → clamp to 1
	_, c3 := applySampling(kept, cached, 1, rand.New(rand.NewSource(42)))
	if got := countTotal(c3); got != 1 {
		t.Errorf("sample 1%%: expected 1 (clamped), got %d", got)
	}
	// 99% of 10 → 9
	_, c4 := applySampling(kept, cached, 99, rand.New(rand.NewSource(42)))
	if got := countTotal(c4); got != 9 {
		t.Errorf("sample 99%%: expected 9, got %d", got)
	}
}
func TestCertaintyPruning(t *testing.T) {
	// Simulate the bug: first file has no matching certainty segments
	df1 := &utils.DataFile{
		FilePath: "/test/file1.data",
		Segments: []*utils.Segment{
			{
				StartTime: 0,
				EndTime:   10,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0", Certainty: 70},
				},
			},
		},
	}
	df2 := &utils.DataFile{
		FilePath: "/test/file2.data",
		Segments: []*utils.Segment{
			{
				StartTime: 0,
				EndTime:   10,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0", Certainty: 100},
				},
			},
		},
	}
	// Without pruning (old bug): file1 is first, has no certainty=100 segments
	// CurrentSegment() would return nil even though TotalSegments() > 0
	state := NewClassifyState(ClassifyConfig{Certainty: 100}, []*utils.DataFile{df1, df2})
	// TotalSegments should be 1 (only file2 has certainty 100)
	if got := state.TotalSegments(); got != 1 {
		t.Errorf("Certainty=100: expected 1 segment, got %d", got)
	}
	// CurrentSegment should work if files are properly pruned
	// Note: this test assumes LoadDataFiles does the pruning
	// Here we test the state after manual construction
}
}
func TestCallTypeNoneFiltering(t *testing.T) {
	// Create test data: Kiwi with calltype, Kiwi without, Tomtit without
	df := &utils.DataFile{
		FilePath: "/test/file1.data",
		Segments: []*utils.Segment{
			{
				StartTime: 0,
				EndTime:   10,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0", CallType: "Male"},
				},
			},
			{
				StartTime: 10,
				EndTime:   20,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0"}, // no calltype
				},
			},
			{
				StartTime: 20,
				EndTime:   30,
				Labels: []*utils.Label{
					{Species: "Tomtit", Filter: "model-1.0"}, // no calltype, wrong species
				},
			},
		},
	}
	// Test 1: --species Kiwi+_ should match only Kiwi with no calltype (1 segment)
	state1 := NewClassifyState(ClassifyConfig{Species: "Kiwi", CallType: utils.CallTypeNone, Certainty: -1}, []*utils.DataFile{df})
	if got := state1.TotalSegments(); got != 1 {
		t.Errorf("Species=Kiwi+_: expected 1 segment, got %d", got)
	}
	// Test 2: --species Kiwi should still match all Kiwi (2 segments)
	state2 := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df})
	if got := state2.TotalSegments(); got != 2 {
		t.Errorf("Species=Kiwi: expected 2 segments, got %d", got)
	}
	// Test 3: --species Kiwi+Male should still work as before (1 segment)
	state3 := NewClassifyState(ClassifyConfig{Species: "Kiwi", CallType: "Male", Certainty: -1}, []*utils.DataFile{df})
	if got := state3.TotalSegments(); got != 1 {
		t.Errorf("Species=Kiwi+Male: expected 1 segment, got %d", got)
	}

File deletion: calls_classify.go

BF:BFD[6.248737] → [6.622528:622569]

BF:BFD[6.622569] → [6.605716:605716]

B:BD[6.605716] → [6.605717:607894]

B:BD[6.607894] → [16.23044:23182]

∅:D[16.23182] → [6.608002:608026]

B:BD[6.608002] → [6.608002:608026]

B:BD[6.608026] → [16.23183:23718]

B:BD[16.23718] → [2.686:779]

∅:D[2.779] → [16.24091:24147]

B:BD[16.24091] → [16.24091:24147]

B:BD[16.24147] → [2.780:871]

∅:D[2.871] → [16.24474:24478]

B:BD[16.24474] → [16.24474:24478]

B:BD[6.608202] → [6.608202:608205]

∅:D[16.24651] → [6.608205:608206]

B:BD[6.608205] → [6.608205:608206]

∅:D[16.24803] → [6.608206:608285]

B:BD[6.608206] → [6.608206:608285]

B:BD[6.608285] → [10.12984:13017]

∅:D[10.13017] → [6.608362:608453]

B:BD[6.608362] → [6.608362:608453]

B:BD[6.608453] → [10.13018:13030]

∅:D[10.13030] → [6.608487:608530]

B:BD[6.608487] → [6.608487:608530]

B:BD[6.608531] → [6.608531:608610]

B:BD[6.608676] → [6.608676:608781]

B:BD[6.608781] → [10.13031:13280]

∅:D[10.13280] → [6.608929:609047]

B:BD[6.608929] → [6.608929:609047]

B:BD[6.609047] → [16.24804:24905]

∅:D[16.24905] → [6.610038:610108]

B:BD[6.610038] → [6.610038:610108]

B:BD[6.610369] → [6.610369:610372]

∅:D[10.13322] → [6.610372:610373]

B:BD[6.610372] → [6.610372:610373]

∅:D[10.13562] → [6.610373:610385]

B:BD[6.610373] → [6.610373:610385]

B:BD[6.610385] → [10.13563:13600]

∅:D[10.13600] → [6.610420:610500]

B:BD[6.610420] → [6.610420:610500]

B:BD[6.610500] → [10.13601:13668]

∅:D[10.13668] → [6.610560:610627]

B:BD[6.610560] → [6.610560:610627]

B:BD[6.610627] → [10.13669:13886]

∅:D[10.13886] → [6.611032:611039]

B:BD[6.611032] → [6.611032:611039]

B:BD[6.611039] → [10.13887:13981]

∅:D[10.13981] → [6.611059:622527]

B:BD[6.611059] → [6.611059:622527]

B:BD[6.610373] → [10.13323:13562]

B:BD[6.610372] → [10.13281:13322]

∅:D[2.2177] → [16.24652:24720]

B:BD[6.608206] → [16.24652:24720]

∅:D[10.12983] → [16.24720:24803]

B:BD[16.24720] → [16.24720:24803]

B:BD[16.24720] → [10.12415:12983]

B:BD[6.608206] → [2.1502:2177]

B:BD[6.608205] → [16.24616:24649]

∅:D[2.1501] → [16.24649:24651]

B:BD[16.24649] → [16.24649:24651]

B:BD[16.24649] → [2.872:1501]

package tools
import (
	"fmt"
	"math/rand"
	"os"
	"path/filepath"
	"slices"
	"sort"
	"strings"
	"time"
	"skraak/utils"
)
// KeyBinding maps a key to a species/calltype
type KeyBinding struct {
	Key      string // single char: "k", "n", "p"
	Species  string // "Kiwi", "Don't Know", "Morepork"
	CallType string // "Duet", "Female", "Male" (optional)
}
// ClassifyConfig holds the configuration for classification
type ClassifyConfig struct {
	Folder    string
	File      string
	Filter    string
	Species   string // scope to this species (optional)
	CallType  string // scope to this calltype within species (optional)
	Certainty int    // scope to this certainty value, -1 = no filter (optional)
	Sample    int    // random sample percentage 1-99, -1 = no sampling, 100 = no-op
	Goto      string // goto this file on startup (optional, basename match)
	Reviewer  string
	Color     bool
	ImageSize int // spectrogram display size in pixels (0 = default)
	Sixel     bool
	ITerm     bool
	Bindings  []KeyBinding
	// SecondaryBindings maps a primary binding key to per-species calltype
	// keys. Invoked via Shift+primary-key: the species is labeled without
	// advancing, and the next key is interpreted as a calltype.
	SecondaryBindings map[string]map[string]string
	Night             bool
	Day               bool
	Lat               float64
	Lng               float64
	Timezone          string
}
// ClassifyState holds the current state for TUI
type ClassifyState struct {
	Config            ClassifyConfig
	DataFiles         []*utils.DataFile
	filteredSegs      [][]*utils.Segment // cached at load time, parallel to DataFiles
	totalSegs         int                // pre-computed total segment count
	FileIdx           int
	SegmentIdx        int
	Dirty             bool
	Player            *utils.AudioPlayer
	PlaybackSpeed     float64 // Current playback speed (1.0 = normal, 0.5 = half speed)
	TimeFilteredCount int     // files skipped by --night or --day filter
}
// BindingResult represents parsed key result
type BindingResult struct {
	Species  string
	CallType string // empty string = remove calltype
}
// LoadDataFiles loads all .data files for classification
// findDataFilePaths resolves the list of .data file paths from config.
func findDataFilePaths(config ClassifyConfig) ([]string, error) {
	if config.File != "" {
		return []string{config.File}, nil
	}
	paths, err := utils.FindDataFiles(config.Folder)
	if err != nil {
		return nil, fmt.Errorf("find data files: %w", err)
	}
	return paths, nil
}
// filterDataFileSegments applies segment and day/night filters to a single data file.
// Returns the filtered segments and whether the file should be kept.
// If the file is filtered out (no matching segments, or time-of-day), returns nil, false.
func filterDataFileSegments(df *utils.DataFile, config ClassifyConfig) ([]*utils.Segment, bool, int) {
	segs := filterSegmentsByLabel(df.Segments, config)
	if segs == nil {
		return nil, false, 0
	}
	timeFiltered := 0
	if config.Night || config.Day {
		keep, tf := filterByTimeOfDay(df.FilePath, config)
		if !keep {
			return nil, false, tf
		}
	}
	if len(filePaths) == 0 {
		return nil, fmt.Errorf("no .data files found")
	}
	var dataFiles []*utils.DataFile
	for _, path := range filePaths {
		df, err := utils.ParseDataFile(path)
		if err != nil {
			continue
		}
		dataFiles = append(dataFiles, df)
	}
	if len(dataFiles) == 0 {
		return nil, fmt.Errorf("no valid .data files")
	}
	sort.Slice(dataFiles, func(i, j int) bool {
		return dataFiles[i].FilePath < dataFiles[j].FilePath
	})
	return dataFiles, nil
}
// filterDataFiles applies segment filters to each data file, returning kept files and their segments.
func filterDataFiles(dataFiles []*utils.DataFile, config ClassifyConfig) ([]*utils.DataFile, [][]*utils.Segment, int) {
	var kept []*utils.DataFile
	var cachedSegs [][]*utils.Segment
	var timeFiltered int
	for _, df := range dataFiles {
		segs, keep, tf := filterDataFileSegments(df, config)
		timeFiltered += tf
		if !keep {
			continue
		}
		kept = append(kept, df)
		cachedSegs = append(cachedSegs, segs)
	}
	total := 0
	for _, segs := range filteredSegs {
		total += len(segs)
	}
	state := &ClassifyState{
		Config:            config,
		DataFiles:         dataFiles,
		filteredSegs:      filteredSegs,
		totalSegs:         total,
		TimeFilteredCount: timeFiltered,
	}
	if config.Goto == "" {
		return state, nil
	}
	for i, df := range state.DataFiles {
		base := df.FilePath[strings.LastIndex(df.FilePath, "/")+1:]
		if base == config.Goto {
			state.FileIdx = i
			return state, nil
		}
	}
	return nil, fmt.Errorf("goto file not found (or has no matching segments): %s", config.Goto)
}
// applySampling randomly selects sample% of segments from the filtered set.
// The returned files and segments preserve the original chronological order.
func applySampling(kept []*utils.DataFile, cachedSegs [][]*utils.Segment, sample int, rng *rand.Rand) ([]*utils.DataFile, [][]*utils.Segment) {
	flat := make([]struct{ fileIdx, segIdx int }, 0)
	for fi, segs := range cachedSegs {
		for si := range segs {
			flat = append(flat, struct{ fileIdx, segIdx int }{fi, si})
		}
	}
	targetCount := max(len(flat)*sample/100, 1)
	rng.Shuffle(len(flat), func(i, j int) { flat[i], flat[j] = flat[j], flat[i] })
	selected := flat[:targetCount]
	// Restore chronological order before rebuilding
	sort.Slice(selected, func(i, j int) bool {
		if selected[i].fileIdx != selected[j].fileIdx {
			return selected[i].fileIdx < selected[j].fileIdx
		}
		return selected[i].segIdx < selected[j].segIdx
	})
	newCached := make([][]*utils.Segment, len(cachedSegs))
	for _, ref := range selected {
		newCached[ref.fileIdx] = append(newCached[ref.fileIdx], cachedSegs[ref.fileIdx][ref.segIdx])
	}
	var newKept []*utils.DataFile
	var finalCached [][]*utils.Segment
	for i, segs := range newCached {
		if len(segs) > 0 {
			newKept = append(newKept, kept[i])
			finalCached = append(finalCached, segs)
		}
	}
	return newKept, finalCached
}
// FilteredSegs returns the cached filtered segments parallel to DataFiles.
func (s *ClassifyState) FilteredSegs() [][]*utils.Segment {
	return s.filteredSegs
}
// CurrentFile returns the current data file
func (s *ClassifyState) CurrentFile() *utils.DataFile {
	if s.FileIdx >= len(s.DataFiles) {
		return nil
	}
	return s.DataFiles[s.FileIdx]
}
// CurrentSegment returns the current segment
func (s *ClassifyState) CurrentSegment() *utils.Segment {
	if s.FileIdx >= len(s.filteredSegs) {
		return nil
	}
	segs := s.filteredSegs[s.FileIdx]
	if s.SegmentIdx >= len(segs) {
		return nil
	}
	return segs[s.SegmentIdx]
}
// TotalSegments returns total segments to review
func (s *ClassifyState) TotalSegments() int {
	return s.totalSegs
}
// CurrentSegmentNumber returns 1-based segment number
func (s *ClassifyState) CurrentSegmentNumber() int {
	count := 0
	for i := 0; i < s.FileIdx; i++ {
		count += len(s.filteredSegs[i])
	}
	return count + s.SegmentIdx + 1
}
// NextSegment moves to the next segment, returns false if at end
func (s *ClassifyState) NextSegment() bool {
	if s.FileIdx >= len(s.filteredSegs) {
		return false
	}
	segs := s.filteredSegs[s.FileIdx]
	if s.SegmentIdx+1 < len(segs) {
		s.SegmentIdx++
		return true
	}
	// Move to next file
	if s.FileIdx+1 < len(s.DataFiles) {
		s.FileIdx++
		s.SegmentIdx = 0
		return true
	}
	return false
}
// PrevSegment moves to the previous segment, returns false if at start
func (s *ClassifyState) PrevSegment() bool {
	if s.SegmentIdx > 0 {
		s.SegmentIdx--
		return true
	}
	// Move to previous file
	if s.FileIdx > 0 {
		s.FileIdx--
		segs := s.filteredSegs[s.FileIdx]
		s.SegmentIdx = max(len(segs)-1, 0)
		return true
	}
	return false
}
// ParseKeyBuffer parses a single key into binding result
func (s *ClassifyState) ParseKeyBuffer(key string) *BindingResult {
	for _, b := range s.Config.Bindings {
		if b.Key == key {
			return &BindingResult{
				Species:  b.Species,
				CallType: b.CallType,
			}
		}
	}
	return nil
}
// SetComment sets the comment on the current segment's filter label.
// Returns the previous comment (for undo) or empty string if none.
func (s *ClassifyState) SetComment(comment string) string {
	seg := s.CurrentSegment()
	if seg == nil {
		return ""
	}
	df := s.CurrentFile()
	if df == nil {
		return ""
	}
	// Set reviewer
	df.Meta.Reviewer = s.Config.Reviewer
	// Get labels matching filter
	filterLabels := seg.GetFilterLabels(s.Config.Filter)
	var oldComment string
	if len(filterLabels) == 0 {
		// No matching labels, add new one with comment
		label := &utils.Label{
			Species:   "Don't Know",
			Certainty: 0,
			Filter:    s.Config.Filter,
			Comment:   comment,
		}
		seg.Labels = append(seg.Labels, label)
	} else {
		// Set comment on first matching label
		oldComment = filterLabels[0].Comment
		filterLabels[0].Comment = comment
	}
	s.Dirty = true
	return oldComment
}
// GetCurrentComment returns the comment on the current segment's filter label.
func (s *ClassifyState) GetCurrentComment() string {
	seg := s.CurrentSegment()
	if seg == nil {
		return ""
	}
	filterLabels := seg.GetFilterLabels(s.Config.Filter)
	if len(filterLabels) == 0 {
		return ""
	}
	return filterLabels[0].Comment
}
// ApplyBinding applies a binding result to the current segment
func (s *ClassifyState) ApplyBinding(result *BindingResult) {
	seg := s.CurrentSegment()
	if seg == nil {
		return
	}
	df := s.CurrentFile()
	if df == nil {
		return
	}
	// Set reviewer
	df.Meta.Reviewer = s.Config.Reviewer
	// Get labels matching filter
	filterLabels := seg.GetFilterLabels(s.Config.Filter)
	// Determine certainty: 0 for Don't Know, 100 for others
	certainty := 100
	if result.Species == "Don't Know" {
		certainty = 0
	}
	if len(filterLabels) == 0 {
		// No matching labels, add new one
		seg.Labels = append(seg.Labels, &utils.Label{
			Species:   result.Species,
			Certainty: certainty,
			Filter:    s.Config.Filter,
			CallType:  result.CallType,
		})
	} else {
		// Edit first matching label, remove rest
		filterLabels[0].Species = result.Species
		filterLabels[0].Certainty = certainty
		filterLabels[0].CallType = result.CallType // always set (empty = remove)
		// Remove extra matching labels
		if len(filterLabels) > 1 {
			var newLabels []*utils.Label
			for _, l := range seg.Labels {
				keep := !slices.Contains(filterLabels[1:], l)
				if keep {
					newLabels = append(newLabels, l)
				}
			}
			seg.Labels = newLabels
		}
	}
	// Re-sort labels
	sort.Slice(seg.Labels, func(i, j int) bool {
		return seg.Labels[i].Species < seg.Labels[j].Species
	})
	s.Dirty = true
}
// ApplyCallTypeOnly sets the CallType on the current segment's first
// filter-matching label. Used after a Shift+primary keypress labeled the
// species and we now receive the secondary key for the calltype.
// No-op if there is no matching label to update.
func (s *ClassifyState) ApplyCallTypeOnly(callType string) {
	seg := s.CurrentSegment()
	if seg == nil {
		return
	}
	df := s.CurrentFile()
	if df == nil {
		return
	}
	filterLabels := seg.GetFilterLabels(s.Config.Filter)
	if len(filterLabels) == 0 {
		return
	}
	df.Meta.Reviewer = s.Config.Reviewer
	filterLabels[0].CallType = callType
	s.Dirty = true
}
// HasSecondary reports whether the given primary key has any secondary
// (calltype) bindings configured.
func (s *ClassifyState) HasSecondary(primaryKey string) bool {
	return len(s.Config.SecondaryBindings[primaryKey]) > 0
}
// ConfirmLabel upgrades the current segment's existing filter label certainty
// to 100. Returns true if a write is needed (label existed and was below 100).
// Returns false for Don't Know (certainty=0) — confirming a Don't Know is a no-op;
// the caller should just advance to the next segment.
func (s *ClassifyState) ConfirmLabel() bool {
	seg := s.CurrentSegment()
	if seg == nil {
		return false
	}
	filterLabels := seg.GetFilterLabels(s.Config.Filter)
	if len(filterLabels) == 0 {
		return false
	}
	if filterLabels[0].Certainty == 0 {
		return false
	}
	if filterLabels[0].Certainty == 100 {
		return false
	}
	df := s.CurrentFile()
	if df == nil {
		return false
	}
	df.Meta.Reviewer = s.Config.Reviewer
	filterLabels[0].Certainty = 100
	s.Dirty = true
	return true
}
// Save saves the current file
func (s *ClassifyState) Save() error {
	df := s.CurrentFile()
	if df == nil {
		return nil
	}
	if !s.Dirty {
		return nil
	}
	err := df.Write(df.FilePath)
	if err != nil {
		return err
	}
	s.Dirty = false
	return nil
}
// getFilterLabel returns the label matching the current filter, or first label if no filter.
func (s *ClassifyState) getFilterLabel(seg *utils.Segment) *utils.Label {
	if s.Config.Filter == "" {
		if len(seg.Labels) > 0 {
			return seg.Labels[0]
		}
		return nil
	}
	for _, label := range seg.Labels {
		if label.Filter == s.Config.Filter {
			return label
		}
	}
	return nil
}
// getOrCreateFilterLabel gets existing label or creates new one for the current filter.
func (s *ClassifyState) getOrCreateFilterLabel(seg *utils.Segment) *utils.Label {
	label := s.getFilterLabel(seg)
	if label != nil {
		return label
	}
	// Create new label
	label = &utils.Label{
		Species:   "Don't Know",
		Certainty: 0,
		Filter:    s.Config.Filter,
	}
	seg.Labels = append(seg.Labels, label)
	s.Dirty = true
	return label
}
// HasBookmark returns true if current segment has a bookmark on the filter label.
func (s *ClassifyState) HasBookmark() bool {
	seg := s.CurrentSegment()
	if seg == nil {
		return false
	}
	label := s.getFilterLabel(seg)
	return label != nil && label.Bookmark
}
// ToggleBookmark toggles the bookmark on the current segment's filter label.
func (s *ClassifyState) ToggleBookmark() {
	seg := s.CurrentSegment()
	if seg == nil {
		return
	}
	df := s.CurrentFile()
	if df == nil {
		return
	}
	// Set reviewer
	df.Meta.Reviewer = s.Config.Reviewer
	label := s.getOrCreateFilterLabel(seg)
	label.Bookmark = !label.Bookmark
	s.Dirty = true
}
// NextBookmark navigates to the next bookmark, wrapping around if needed.
// Returns false if no bookmarks found (back at start position).
func (s *ClassifyState) NextBookmark() bool {
	startFile := s.FileIdx
	startSeg := s.SegmentIdx
	first := true
	for {
		// Advance to next segment
		if !s.NextSegment() {
			// Wrap to start of folder
			s.FileIdx = 0
			s.SegmentIdx = 0
		}
		// Check if we've looped back to start
		if !first && s.FileIdx == startFile && s.SegmentIdx == startSeg {
			return false // full circle, no bookmark found
		}
		first = false
		// Check if current segment has bookmark
		if s.hasFilterBookmark() {
			return true
		}
	}
}
// PrevBookmark navigates to the previous bookmark, wrapping around if needed.
// Returns false if no bookmarks found (back at start position).
func (s *ClassifyState) PrevBookmark() bool {
	startFile := s.FileIdx
	startSeg := s.SegmentIdx
	first := true
	for {
		// Move to previous segment
		if !s.PrevSegment() {
			// Wrap to end of folder
			s.FileIdx = len(s.DataFiles) - 1
			segs := s.filteredSegs[s.FileIdx]
			s.SegmentIdx = max(len(segs)-1, 0)
		}
		// Check if we've looped back to start
		if !first && s.FileIdx == startFile && s.SegmentIdx == startSeg {
			return false // full circle, no bookmark found
		}
		first = false
		// Check if current segment has bookmark
		if s.hasFilterBookmark() {
			return true
		}
	}
}
// hasFilterBookmark checks if current segment has bookmark on filter-matching label.
func (s *ClassifyState) hasFilterBookmark() bool {
	seg := s.CurrentSegment()
	if seg == nil {
		return false
	}
	label := s.getFilterLabel(seg)
	return label != nil && label.Bookmark
}
// FormatLabels formats labels for display
func FormatLabels(labels []*utils.Label, filter string) string {
	var parts []string
	for _, l := range labels {
		if filter != "" && l.Filter != filter {
			continue
		}
		part := l.Species
		if l.CallType != "" {
			part += "/" + l.CallType
		}
		part += fmt.Sprintf(" (%d%%)", l.Certainty)
		if l.Filter != "" {
			part += " [" + l.Filter + "]"
		}
		if l.Comment != "" {
			part += fmt.Sprintf(" \"%s\"", l.Comment)
		}
		parts = append(parts, part)
	}
	return strings.Join(parts, ", ")
}
// buildClassifyState constructs the ClassifyState, handling --goto file positioning.
func buildClassifyState(config ClassifyConfig, dataFiles []*utils.DataFile, filteredSegs [][]*utils.Segment, timeFiltered int) (*ClassifyState, error) {
	return kept, cachedSegs, timeFiltered
}
func LoadDataFiles(config ClassifyConfig) (*ClassifyState, error) {
	filePaths, err := findDataFilePaths(config)
	if err != nil {
		return nil, err
	}
	dataFiles, err := parseAndSortDataFiles(config)
	if err != nil {
		return nil, err
	}
	kept, cachedSegs, timeFiltered := filterDataFiles(dataFiles, config)
	if config.Sample > 0 && config.Sample < 100 {
		rng := rand.New(rand.NewSource(time.Now().UnixNano()))
		kept, cachedSegs = applySampling(kept, cachedSegs, config.Sample, rng)
	}
	return buildClassifyState(config, kept, cachedSegs, timeFiltered)
}
// parseAndSortDataFiles finds, parses, and sorts .data files from the config.
func parseAndSortDataFiles(config ClassifyConfig) ([]*utils.DataFile, error) {
// filterByTimeOfDay checks --night/--day time-of-day filter for a .data file.
// Returns (keep, timeFilteredCount).
func filterByTimeOfDay(dataFilePath string, config ClassifyConfig) (bool, int) {
	wavPath := filepath.Clean(strings.TrimSuffix(dataFilePath, ".data"))
	result, err := IsNight(IsNightInput{
		FilePath: wavPath,
		Lat:      config.Lat,
		Lng:      config.Lng,
		Timezone: config.Timezone,
	})
	if err != nil {
		fmt.Fprintf(os.Stderr, "warning: skipping %s (isnight error: %v)\n", wavPath, err)
		return false, 1
	}
	if config.Night && !result.SolarNight {
		return false, 1
	}
	if config.Day && !result.DiurnalActive {
		return false, 1
	}
	return true, 0
}
	return segs, true, timeFiltered
}
}
// filterSegmentsByLabel applies label/species/certainty filters, returning matching segments.
// Returns nil if no segments match (caller should skip the file).
func filterSegmentsByLabel(segments []*utils.Segment, config ClassifyConfig) []*utils.Segment {
	hasFilter := config.Filter != "" || config.Species != "" || config.Certainty >= 0
	if !hasFilter {
		return segments
	}
	var segs []*utils.Segment
	for _, seg := range segments {
		if seg.SegmentMatchesFilters(config.Filter, config.Species, config.CallType, config.Certainty) {
			segs = append(segs, seg)
		}
	}
	return segs // nil if empty, caller treats as "skip"

File deletion: bulk_file_import.go

BF:BFD[6.248737] → [6.638264:638307]

BF:BFD[6.638307] → [6.622571:622571]

B:BD[6.622571] → [6.622572:622848]

∅:D[4.3883] → [6.622848:624779]

B:BD[6.622848] → [6.622848:624779]

∅:D[10.14305] → [6.624779:625595]

B:BD[6.624779] → [6.624779:625595]

B:BD[6.625595] → [10.14306:14390]

∅:D[10.14390] → [6.625722:626006]

B:BD[6.625722] → [6.625722:626006]

B:BD[6.626006] → [10.14391:14476]

∅:D[10.14476] → [6.626134:626420]

B:BD[6.626134] → [6.626134:626420]

∅:D[4.3996] → [10.14560:14635]

B:BD[10.14560] → [10.14560:14635]

∅:D[10.14635] → [6.627186:627333]

B:BD[6.627186] → [6.627186:627333]

∅:D[4.4063] → [6.627459:627532]

B:BD[6.627459] → [6.627459:627532]

B:BD[6.627532] → [10.14636:14726]

∅:D[10.14726] → [6.627665:627756]

B:BD[6.627665] → [6.627665:627756]

B:BD[6.628118] → [6.628118:628119]

B:BD[6.628119] → [10.14727:14922]

∅:D[10.14922] → [6.629277:629280]

B:BD[6.629277] → [6.629277:629280]

∅:D[10.14993] → [6.629280:629281]

B:BD[6.629280] → [6.629280:629281]

∅:D[10.15020] → [6.629281:629329]

B:BD[6.629281] → [6.629281:629329]

B:BD[6.629329] → [10.15021:15357]

∅:D[10.15357] → [6.630775:630776]

B:BD[6.630775] → [6.630775:630776]

B:BD[6.630776] → [10.15358:15502]

∅:D[10.15502] → [6.630926:630969]

B:BD[6.630926] → [6.630926:630969]

B:BD[6.630969] → [10.15503:15735]

∅:D[10.15735] → [6.631165:631243]

B:BD[6.631165] → [6.631165:631243]

B:BD[6.631396] → [6.631396:632109]

∅:D[4.4129] → [6.632153:632253]

B:BD[6.632153] → [6.632153:632253]

B:BD[6.632626] → [6.632626:632627]

B:BD[6.632627] → [11.25266:25310]

∅:D[11.25310] → [15.5833:5917]

B:BD[6.632715] → [15.5833:5917]

∅:D[15.5917] → [6.632802:633317]

B:BD[6.632802] → [6.632802:633317]

B:BD[6.633317] → [15.5918:6011]

∅:D[15.6011] → [6.633413:633476]

B:BD[6.633413] → [6.633413:633476]

∅:D[10.16508] → [6.633476:633478]

B:BD[6.633476] → [6.633476:633478]

∅:D[10.17233] → [6.633478:633479]

B:BD[6.633478] → [6.633478:633479]

B:BD[6.633479] → [10.17234:19622]

∅:D[10.19622] → [6.633524:637481]

B:BD[6.633524] → [6.633524:637481]

∅:D[17.5980] → [6.637560:637703]

B:BD[6.637560] → [6.637560:637703]

∅:D[17.5997] → [6.637703:637725]

B:BD[6.637703] → [6.637703:637725]

∅:D[17.6102] → [6.637725:638263]

B:BD[6.637725] → [6.637725:638263]

B:BD[6.637725] → [17.5998:6102]

B:BD[6.637703] → [17.5981:5997]

B:BD[6.637481] → [17.5700:5980]

B:BD[6.633478] → [10.16509:17233]

B:BD[6.633476] → [10.15736:15871]

∅:D[4.4252] → [10.15978:16508]

B:BD[10.15978] → [10.15978:16508]

B:BD[10.15871] → [4.4130:4252]

B:BD[6.632109] → [4.4064:4129]

B:BD[6.629281] → [10.14994:15020]

B:BD[6.629280] → [10.14923:14993]

B:BD[6.627333] → [4.3997:4063]

B:BD[6.626420] → [4.3884:3996]

B:BD[6.624779] → [10.13983:14305]

B:BD[6.622848] → [4.3846:3883]

package tools
import (
	"context"
	"database/sql"
	"encoding/csv"
	"fmt"
	"os"
	"path/filepath"
	"strconv"
	"strings"
	"time"
	"skraak/db"
	"skraak/utils"
)
// BulkFileImportInput defines the input parameters for the bulk_file_import tool
type BulkFileImportInput struct {
	DatasetID   string `json:"dataset_id"`
	CSVPath     string `json:"csv_path"`
	LogFilePath string `json:"log_file_path"`
}
// BulkFileImportOutput defines the output structure for the bulk_file_import tool
type BulkFileImportOutput struct {
	TotalLocations    int      `json:"total_locations"`
	ClustersCreated   int      `json:"clusters_created"`
	ClustersExisting  int      `json:"clusters_existing"`
	TotalFilesScanned int      `json:"total_files_scanned"`
	FilesImported     int      `json:"files_imported"`
	FilesDuplicate    int      `json:"files_duplicate"`
	FilesError        int      `json:"files_error"`
	ProcessingTime    string   `json:"processing_time"`
	Errors            []string `json:"errors,omitempty"`
}
// bulkLocationData holds CSV row data for a location
type bulkLocationData struct {
	LocationName  string
	LocationID    string
	DirectoryPath string
	DateRange     string
	SampleRate    int
	FileCount     int
}
// bulkImportStats tracks import statistics for a single cluster
type bulkImportStats struct {
	TotalFiles     int
	ImportedFiles  int
	DuplicateFiles int
	ErrorFiles     int
}
// progressLogger handles writing to both log file and internal buffer
type progressLogger struct {
	file   *os.File
	buffer *strings.Builder
}
// Log writes a formatted message with timestamp to both log file and buffer
func (l *progressLogger) Log(format string, args ...any) {
	timestamp := time.Now().Format("2006-01-02 15:04:05")
	message := fmt.Sprintf(format, args...)
	line := fmt.Sprintf("[%s] %s\n", timestamp, message)
	// Write to file; log write failures are non-fatal for import progress
	if _, err := l.file.WriteString(line); err != nil {
		fmt.Fprintf(os.Stderr, "Warning: log write failed: %v\n", err)
	}
	if err := l.file.Sync(); err != nil {
		fmt.Fprintf(os.Stderr, "Warning: log sync failed: %v\n", err)
	}
	// Also keep in memory for potential error reporting
	l.buffer.WriteString(line)
}
// BulkFileImport imports WAV files across multiple locations using CSV specification
func BulkFileImport(
	ctx context.Context,
	input BulkFileImportInput,
) (BulkFileImportOutput, error) {
	startTime := time.Now()
	var output BulkFileImportOutput
	// Open log file
	logFile, err := os.OpenFile(input.LogFilePath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
	if err != nil {
		return output, fmt.Errorf("failed to open log file: %w", err)
	}
	defer func() { _ = logFile.Close() }()
	logger := &progressLogger{
		file:   logFile,
		buffer: &strings.Builder{},
	}
	logger.Log("Starting bulk file import for dataset %s", input.DatasetID)
	// Phase 0: Validate input
	logger.Log("Validating input parameters...")
	if err := bulkValidateInput(input); err != nil {
		logger.Log("ERROR: Validation failed: %v", err)
		output.failOutput([]string{fmt.Sprintf("validation failed: %v", err)}, startTime)
		return output, fmt.Errorf("validation failed: %w", err)
	}
	logger.Log("Validation complete")
	// Phase 1: Read CSV
	logger.Log("Reading CSV file: %s", input.CSVPath)
	locations, err := bulkReadCSV(input.CSVPath)
	if err != nil {
		logger.Log("ERROR: Failed to read CSV: %v", err)
		output.failOutput([]string{fmt.Sprintf("failed to read CSV: %v", err)}, startTime)
		return output, fmt.Errorf("failed to read CSV: %w", err)
	}
	logger.Log("Loaded %d locations from CSV", len(locations))
	output.TotalLocations = len(locations)
	// Phase 1.5: Validate all location_ids belong to the dataset
	logger.Log("Validating location_ids belong to dataset...")
		output.failOutput([]string{err.Error()}, startTime)
		return output, err
	}
	logger.Log("Location validation complete")
	// Phase 2: Create/Validate Clusters
	logger.Log("=== Phase 1: Creating/Validating Clusters ===")
	if err != nil {
		logger.Log("ERROR: Failed to open database: %v", err)
		output.failOutput([]string{fmt.Sprintf("failed to open database: %v", err)}, startTime)
		return output, fmt.Errorf("failed to open database: %w", err)
	}
	defer database.Close()
	clusterIDMap, created, existing, err := bulkCreateClusters(ctx, database, logger, locations, input.DatasetID)
	if err != nil {
		output.failOutput(output.Errors, startTime)
		return output, err
	}
	logger.Log("=== Phase 2: Importing Files ===")
	fileStats, errs := bulkImportAllFiles(database, logger, locations, clusterIDMap, input.DatasetID)
	output.TotalFilesScanned = fileStats.TotalFiles
	output.FilesImported = fileStats.ImportedFiles
	output.FilesDuplicate = fileStats.DuplicateFiles
	output.FilesError = fileStats.ErrorFiles
	output.Errors = append(output.Errors, errs...)
	if len(errs) > 0 {
		output.ProcessingTime = time.Since(startTime).String()
		return output, fmt.Errorf("failed to import files: %s", errs[0])
	}
	logger.Log("=== Import Complete ===")
	logger.Log("Total files scanned: %d", fileStats.TotalFiles)
	logger.Log("Files imported: %d", fileStats.ImportedFiles)
	logger.Log("Duplicates skipped: %d", fileStats.DuplicateFiles)
	logger.Log("Errors: %d", fileStats.ErrorFiles)
	logger.Log("Processing time: %s", time.Since(startTime).Round(time.Second))
	output.ProcessingTime = time.Since(startTime).String()
	return output, nil
}
// bulkValidateInput validates input parameters
func bulkValidateInput(input BulkFileImportInput) error {
	// Validate ID format first (fast fail before DB queries)
	if err := utils.ValidateShortID(input.DatasetID, "dataset_id"); err != nil {
		return err
	}
	// Verify CSV file exists
	if _, err := os.Stat(input.CSVPath); err != nil {
		return fmt.Errorf("CSV file not accessible: %w", err)
	}
	// Verify log file path is writable
	logDir := filepath.Dir(input.LogFilePath)
	if _, err := os.Stat(logDir); err != nil {
		return fmt.Errorf("log file directory not accessible: %w", err)
	}
	// Open database for validation queries
	if err != nil {
		return fmt.Errorf("failed to open database: %w", err)
	}
	defer database.Close()
	// Verify dataset exists and is structured
	if err := db.ValidateDatasetTypeForImport(database, input.DatasetID); err != nil {
		return err
	}
	return nil
}
// bulkValidateLocationsBelongToDataset validates that all unique location_ids in the CSV belong to the dataset
func bulkValidateLocationsBelongToDataset(dbConn *sql.DB, locations []bulkLocationData, datasetID string) []string {
	var errors []string
	// Collect unique location_ids
	uniqueLocations := make(map[string]bool)
	for _, loc := range locations {
		uniqueLocations[loc.LocationID] = true
	}
	// Validate each unique location_id
	for locationID := range uniqueLocations {
		if err := db.ValidateLocationBelongsToDataset(dbConn, locationID, datasetID); err != nil {
			errors = append(errors, err.Error())
		}
	}
	return errors
}
		var clusterID string
		if err == sql.ErrNoRows {
			clusterID, err = bulkCreateCluster(ctx, database, datasetID, loc.LocationID, loc.DateRange, loc.SampleRate)
			if err != nil {
				logger.Log("ERROR: Failed to create cluster for location %s: %v", loc.LocationName, err)
				return nil, 0, 0, fmt.Errorf("failed to create cluster: %w", err)
			}
			logger.Log("  Created cluster: %s", clusterID)
			created++
		} else if err != nil {
			logger.Log("ERROR: Failed to check cluster for location %s: %v", loc.LocationName, err)
			return nil, 0, 0, fmt.Errorf("failed to check cluster: %w", err)
		} else {
			clusterID = existingClusterID
			logger.Log("  Using existing cluster: %s", clusterID)
			existing++
		}
		compositeKey := loc.LocationID + "|" + loc.DateRange
		clusterIDMap[compositeKey] = clusterID
	}
	return clusterIDMap, created, existing, nil
}
// bulkImportAllFiles imports files for all locations using the cluster ID map.
// Returns aggregate stats and any error messages.
func bulkImportAllFiles(database *sql.DB, logger *progressLogger, locations []bulkLocationData, clusterIDMap map[string]string, datasetID string) (bulkImportStats, []string) {
	var total bulkImportStats
	var errs []string
	for i, loc := range locations {
		compositeKey := loc.LocationID + "|" + loc.DateRange
		clusterID, ok := clusterIDMap[compositeKey]
		if !ok {
			continue
		}
		logger.Log("[%d/%d] Importing files for: %s", i+1, len(locations), loc.LocationName)
		logger.Log("  Directory: %s", loc.DirectoryPath)
		if _, err := os.Stat(loc.DirectoryPath); os.IsNotExist(err) {
			logger.Log("  WARNING: Directory not found, skipping")
			continue
		}
		stats, err := bulkImportFilesForCluster(database, logger, loc.DirectoryPath, datasetID, loc.LocationID, clusterID)
		if err != nil {
			errMsg := fmt.Sprintf("Failed to import files for location %s: %v", loc.LocationName, err)
			logger.Log("ERROR: %s", errMsg)
			return total, []string{errMsg}
		}
		logger.Log("  Scanned: %d files", stats.TotalFiles)
		logger.Log("  Imported: %d, Duplicates: %d", stats.ImportedFiles, stats.DuplicateFiles)
		if stats.ErrorFiles > 0 {
			logger.Log("  Errors: %d files", stats.ErrorFiles)
		}
		total.TotalFiles += stats.TotalFiles
		total.ImportedFiles += stats.ImportedFiles
		total.DuplicateFiles += stats.DuplicateFiles
		total.ErrorFiles += stats.ErrorFiles
	}
	return total, errs
}
func bulkReadCSV(path string) ([]bulkLocationData, error) {
	file, err := os.Open(path)
	if err != nil {
		return nil, err
	}
	defer func() { _ = file.Close() }()
	reader := csv.NewReader(file)
	records, err := reader.ReadAll()
	if err != nil {
		return nil, err
	}
	if len(records) == 0 {
		return nil, fmt.Errorf("CSV file is empty")
	}
	var locations []bulkLocationData
	for i, record := range records {
		if i == 0 {
			continue // Skip header
		}
		if len(record) < 6 {
			return nil, fmt.Errorf("CSV row %d has insufficient columns (expected 6, got %d)", i+1, len(record))
		}
		// Validate required string fields are non-empty
		locationName := strings.TrimSpace(record[0])
		if locationName == "" {
			return nil, fmt.Errorf("empty location_name in row %d", i+1)
		}
		directoryPath := strings.TrimSpace(record[2])
		if directoryPath == "" {
			return nil, fmt.Errorf("empty directory_path in row %d", i+1)
		}
		dateRange := strings.TrimSpace(record[3])
		if dateRange == "" {
			return nil, fmt.Errorf("empty date_range in row %d", i+1)
		}
		// Validate location_id format
		locationID := record[1]
		if err := utils.ValidateShortID(locationID, "location_id"); err != nil {
			return nil, fmt.Errorf("invalid location_id in row %d: %v", i+1, err)
		}
		sampleRate, err := strconv.Atoi(record[4])
		if err != nil {
			return nil, fmt.Errorf("invalid sample_rate in row %d: %v", i+1, err)
		}
		// Validate sample rate is in reasonable range
		if err := utils.ValidateSampleRate(sampleRate); err != nil {
			return nil, fmt.Errorf("invalid sample_rate in row %d: %v", i+1, err)
		}
		fileCount, err := strconv.Atoi(record[5])
		if err != nil {
			return nil, fmt.Errorf("invalid file_count in row %d: %v", i+1, err)
		}
		locations = append(locations, bulkLocationData{
			LocationName:  locationName,
			LocationID:    locationID,
			DirectoryPath: directoryPath,
			DateRange:     dateRange,
			SampleRate:    sampleRate,
			FileCount:     fileCount,
		})
	}
	return locations, nil
}
// bulkCreateCluster creates a new cluster in the database
func bulkCreateCluster(ctx context.Context, database *sql.DB, datasetID, locationID, name string, sampleRate int) (string, error) {
	// Generate a 12-character nanoid
	clusterID, err := utils.GenerateShortID()
	if err != nil {
		return "", fmt.Errorf("failed to generate cluster ID: %v", err)
	}
	now := time.Now().UTC()
	// Get location name for the path
	var locationName string
	err = database.QueryRow("SELECT name FROM location WHERE id = ?", locationID).Scan(&locationName)
	if err != nil {
		return "", fmt.Errorf("failed to get location name: %v", err)
	}
	// Normalize path: replace spaces and special characters
	path := strings.ReplaceAll(locationName, " ", "_")
	path = strings.ReplaceAll(path, "/", "_")
	tx, err := db.BeginLoggedTx(ctx, database, "bulk_file_import")
	if err != nil {
		return "", fmt.Errorf("failed to begin transaction: %w", err)
	}
	defer tx.Rollback()
	_, err = tx.ExecContext(ctx, `
		INSERT INTO cluster (id, dataset_id, location_id, name, path, sample_rate, active, created_at, last_modified)
		VALUES (?, ?, ?, ?, ?, ?, true, ?, ?)
	`, clusterID, datasetID, locationID, name, path, sampleRate, now, now)
	if err != nil {
		return "", fmt.Errorf("failed to insert cluster: %w", err)
	}
	if err = tx.Commit(); err != nil {
		return "", fmt.Errorf("failed to commit cluster creation: %w", err)
	}
	return clusterID, nil
}
// bulkImportFilesForCluster imports all WAV files for a single cluster
func bulkImportFilesForCluster(database *sql.DB, logger *progressLogger, folderPath, datasetID, locationID, clusterID string) (*bulkImportStats, error) {
	stats := &bulkImportStats{}
	// Check if directory exists
	if _, err := os.Stat(folderPath); os.IsNotExist(err) {
		logger.Log("  WARNING: Directory not found, skipping")
		return stats, nil
	}
	// Import the cluster (SAME LOGIC AS import_files.go)
	logger.Log("  Importing cluster %s", clusterID)
		FolderPath: folderPath,
		DatasetID:  datasetID,
		LocationID: locationID,
		ClusterID:  clusterID,
		Recursive:  true,
	})
	if err != nil {
		return nil, err
	}
	// Map to bulk import stats
	stats.TotalFiles = clusterOutput.TotalFiles
	stats.ImportedFiles = clusterOutput.ImportedFiles
	stats.DuplicateFiles = clusterOutput.SkippedFiles
	stats.ErrorFiles = clusterOutput.FailedFiles
	// Log errors
	for i, fileErr := range clusterOutput.Errors {
		if i < 5 { // Log first 5
			logger.Log("    ERROR: %s: %s", fileErr.FileName, fileErr.Error)
		}
	}
	logger.Log("  Complete: %d imported, %d duplicates, %d errors", stats.ImportedFiles, stats.DuplicateFiles, stats.ErrorFiles)
	return stats, nil
}
	if err := tx.Commit(); err != nil {
		return nil, fmt.Errorf("transaction commit failed: %w", err)
	}
		tx.Rollback()
	ctx := context.Background()
	tx, err := db.BeginLoggedTx(ctx, database, "import_audio_files")
	if err != nil {
		return nil, fmt.Errorf("failed to begin transaction: %w", err)
	}
	clusterOutput, err := utils.ImportCluster(database, tx.UnderlyingTx(), utils.ClusterImportInput{
// bulkCreateClusters creates or validates clusters for all locations.
// Returns the cluster ID map, counts of created/existing clusters, and any error.
func bulkCreateClusters(ctx context.Context, database *sql.DB, logger *progressLogger, locations []bulkLocationData, datasetID string) (map[string]string, int, int, error) {
	clusterIDMap := make(map[string]string)
	created := 0
	existing := 0
	for i, loc := range locations {
		logger.Log("[%d/%d] Processing location: %s", i+1, len(locations), loc.LocationName)
		var existingClusterID string
		err := database.QueryRow(`
			SELECT id FROM cluster
			WHERE location_id = ? AND name = ? AND active = true
		`, loc.LocationID, loc.DateRange).Scan(&existingClusterID)
}
// bulkValidateLocations validates that all location_ids in the CSV belong to the dataset.
// Returns an error if validation fails.
	readDB, err := db.OpenReadOnlyDB(dbPath)
	if err != nil {
		logger.Log("ERROR: Failed to open database: %v", err)
		return fmt.Errorf("failed to open database: %w", err)
	}
	locationErrors := bulkValidateLocationsBelongToDataset(readDB, locations, datasetID)
	readDB.Close()
	if len(locationErrors) > 0 {
		for _, locErr := range locationErrors {
			logger.Log("ERROR: %s", locErr)
		}
		return fmt.Errorf("location validation failed: %d location(s) do not belong to dataset %s", len(locationErrors), datasetID)
	}
	return nil
func bulkValidateLocations(logger *progressLogger, locations []bulkLocationData, datasetID string, dbPath string) error {
	database, err := db.OpenReadOnlyDB(resolveDBPath(input.DBPath))
	// Phase 3: Import files
	output.ClustersCreated = created
	output.ClustersExisting = existing
	database, err := db.OpenWriteableDB(resolveDBPath(input.DBPath))
	if err := bulkValidateLocations(logger, locations, input.DatasetID, resolveDBPath(input.DBPath)); err != nil {
}
// BulkFileImport imports WAV files across multiple locations using CSV specification
// failOutput sets error details and processing time on the output before returning.
func (o *BulkFileImportOutput) failOutput(errs []string, startTime time.Time) {
	o.Errors = errs
	o.ProcessingTime = time.Since(startTime).String()
	DBPath      string `json:"db_path"`

Deletion in tools/update_test.go at line 65 [6.248756]
B:BD[6.250266] → [6.250266:250286]
```
	SetDBPath(dbPath)
```
Insertion in tools/update_test.go at line 71 [6.248756]
[6.250436]
[6.250436]
```
		DBPath:      dbPath,
```
Insertion in tools/update_test.go at line 97 [6.248756]
[6.251252]
[6.251252]
```
		DBPath:      dbPath,
```
Deletion in tools/update_test.go at line 125 [6.248756]
B:BD[6.252154] → [6.252154:252174]
```
	SetDBPath(dbPath)
```

Replacement in tools/update_test.go at line 127 [6.248756]

B:BD[6.252227] → [6.252227:252319]

	dsCreated, err := CreateOrUpdateDataset(context.Background(), DatasetInput{Name: &dsName})

[6.252227]

[6.252319]

	dsCreated, err := CreateOrUpdateDataset(context.Background(), DatasetInput{DBPath: dbPath, Name: &dsName})

Insertion in tools/update_test.go at line 139 [6.248756]
[6.252577]
[6.252577]
```
		DBPath:      dbPath,
```
Insertion in tools/update_test.go at line 165 [6.248756]
[6.253317]
[6.253317]
```
		DBPath:      dbPath,
```
Deletion in tools/update_test.go at line 199 [6.248756]
B:BD[6.254530] → [6.254530:254550]
```
	SetDBPath(dbPath)
```

Replacement in tools/update_test.go at line 201 [6.248756]

B:BD[6.254608] → [6.254608:254700]

	dsCreated, err := CreateOrUpdateDataset(context.Background(), DatasetInput{Name: &dsName})

[6.254608]

[6.254700]

	dsCreated, err := CreateOrUpdateDataset(context.Background(), DatasetInput{DBPath: dbPath, Name: &dsName})

Insertion in tools/update_test.go at line 210 [6.248756]
[6.254931]
[6.254931]
```
		DBPath:     dbPath,
```
Insertion in tools/update_test.go at line 226 [6.248756]
[6.255277]
[6.255277]
```
		DBPath:      dbPath,
```
Insertion in tools/update_test.go at line 243 [6.248756]
[6.255717]
[6.255717]
```
		DBPath:      dbPath,
```
Deletion in tools/update_test.go at line 271 [6.248756]
B:BD[6.256635] → [6.256635:256655]
```
	SetDBPath(dbPath)
```
Insertion in tools/update_test.go at line 275 [6.248756]
[6.256749]
[6.256749]
```
		DBPath:        dbPath,
```
Insertion in tools/update_test.go at line 297 [6.248756]
[6.257297]
[6.257297]
```
		DBPath:        dbPath,
```
Deletion in tools/update_test.go at line 321 [6.248756]
B:BD[6.257999] → [6.257999:258019]
```
	SetDBPath(dbPath)
```

Replacement in tools/update_test.go at line 324 [6.248756]

B:BD[6.258065] → [6.258065:258153]

	created, err := CreateOrUpdateDataset(context.Background(), DatasetInput{Name: &name})

[6.258065]

[6.258153]

	created, err := CreateOrUpdateDataset(context.Background(), DatasetInput{DBPath: dbPath, Name: &name})

Replacement in tools/update_test.go at line 331 [6.248756]
B:BD[6.258291] → [6.258291:258318]
```
		ID: &created.Dataset.ID,
```
[6.258291]
[6.258318]
```
		DBPath: dbPath,
		ID:     &created.Dataset.ID,
```

Deletion in tools/sql.go at line 14 [6.259371]

B:BD[6.259494] → [6.259494:259544]

B:BD[6.259544] → [4.55:144]

∅:D[4.144] → [6.259544:259621]

B:BD[6.259544] → [6.259544:259621]

B:BD[6.259621] → [4.145:234]

∅:D[4.234] → [6.259666:259711]

B:BD[6.259666] → [6.259666:259711]

B:BD[6.259711] → [4.235:237]


// Package-level variable to store database path
// Deprecated: use Input.DBPath instead. Will be removed after all callers are migrated.
var dbPath string
// SetDBPath sets the database path for the tools package
// Deprecated: use Input.DBPath instead. Will be removed after all callers are migrated.
func SetDBPath(path string) {
	dbPath = path
}

Replacement in tools/sql.go at line 15 [6.259371]

B:BD[4.238] → [4.238:444]

// resolveDBPath returns the DBPath from the input if set, otherwise falls back
// to the package-level dbPath. This supports the incremental migration from
// the global variable to explicit input fields.

[4.238]

[4.444]

// resolveDBPath returns the DBPath from the input if set, otherwise returns
// the empty string. Callers that need a fallback should use db.ResolveDBPath.

Replacement in tools/sql.go at line 18 [6.259371]

B:BD[4.490] → [4.490:549]

	if inputPath != "" {
		return inputPath
	}
	return dbPath

[4.490]

[6.259711]

	return db.ResolveDBPath(inputPath, "")

Deletion in tools/pattern_test.go at line 16 [6.276451]
B:BD[6.276774] → [6.276774:276794]
```
	SetDBPath(testDB)
```
Insertion in tools/pattern_test.go at line 24 [6.276451]
[6.277050]
[6.277050]
```
			DBPath:        testDB,
```
Insertion in tools/pattern_test.go at line 58 [6.276451]
[6.277928]
[6.277928]
```
			DBPath:        testDB,
```

Replacement in tools/pattern_test.go at line 82 [6.276451]

B:BD[6.278573] → [6.278573:278626]

		output2, err2 := CreateOrUpdatePattern(ctx, input)

[6.278573]

[6.278626]

		input2 := PatternInput{
			DBPath:        testDB,
			RecordSeconds: &record,
			SleepSeconds:  &sleep,
		}
		output2, err2 := CreateOrUpdatePattern(ctx, input2)

Deletion in tools/pattern_test.go at line 106 [6.276451]
B:BD[6.279169] → [6.279169:279189]
```
	SetDBPath(testDB)
```
Insertion in tools/pattern_test.go at line 125 [6.276451]
[6.279681]
[6.279681]
```
				DBPath:        testDB,
```
Deletion in tools/pattern_test.go at line 143 [6.276451]
B:BD[6.280148] → [6.280148:280168]
```
	SetDBPath(testDB)
```
Insertion in tools/pattern_test.go at line 149 [6.276451]
[6.280318]
[6.280318]
```
			DBPath:        testDB,
```
Replacement in tools/pattern_test.go at line 163 [6.276451]
B:BD[6.280594] → [6.280594:280606]
```
			ID: &id,
```
[6.280594]
[6.280606]
```
			DBPath: testDB,
			ID:     &id,
```
Deletion in tools/integration_test.go at line 15 [6.303931]
B:BD[6.304268] → [6.304268:304287]
```
	SetDBPath(testDB)
```
Replacement in tools/integration_test.go at line 18 [6.303931]
B:BD[6.304420] → [19.3948:3984]
```
		testQueryExistingPatterns(t, ctx)
```
[6.304420]
[6.304955]
```
		testQueryExistingPatterns(t, ctx, testDB)
```
Replacement in tools/integration_test.go at line 22 [6.303931]
B:BD[6.305071] → [19.3985:4024]
```
		testCreateClusterWithPattern(t, ctx)
```
[6.305071]
[19.4024]
```
		testCreateClusterWithPattern(t, ctx, testDB)
```

Replacement in tools/integration_test.go at line 26 [6.303931]

B:BD[6.305432] → [19.4031:4099]

func testQueryExistingPatterns(t *testing.T, ctx context.Context) {

[6.305432]

[19.4099]

func testQueryExistingPatterns(t *testing.T, ctx context.Context, testDB string) {

Replacement in tools/integration_test.go at line 29 [6.303931]

B:BD[19.4138] → [19.4138:4256]

		Query: "SELECT id, record_s, sleep_s FROM cyclic_recording_pattern WHERE active = true ORDER BY record_s, sleep_s",

[19.4138]

[19.4256]

		DBPath: testDB,
		Query:  "SELECT id, record_s, sleep_s FROM cyclic_recording_pattern WHERE active = true ORDER BY record_s, sleep_s",

Replacement in tools/integration_test.go at line 48 [6.303931]

B:BD[6.306843] → [19.4635:4706]

func testCreateClusterWithPattern(t *testing.T, ctx context.Context) {

[6.306843]

[19.4706]

func testCreateClusterWithPattern(t *testing.T, ctx context.Context, testDB string) {

Replacement in tools/integration_test.go at line 52 [6.303931]

B:BD[19.4799] → [19.4799:4862]

		Query: "SELECT id FROM dataset WHERE active = true LIMIT 1",

[19.4799]

[19.4862]

		DBPath: testDB,
		Query:  "SELECT id FROM dataset WHERE active = true LIMIT 1",

Insertion in tools/integration_test.go at line 62 [6.303931]
[19.5107]
[19.5107]
```
		DBPath:     testDB,
```
Insertion in tools/integration_test.go at line 75 [6.303931]
[19.5538]
[19.5538]
```
		DBPath:                   testDB,
```
Insertion in tools/integration_test.go at line 90 [6.303931]
[19.6041]
[19.6041]
```
		DBPath:     testDB,
```
File addition: import (d--x------)
[6.248737]

File addition: import_unstructured.go (----------)

[0.1]

package imp
import (
	"context"
	"database/sql"
	"fmt"
	"io/fs"
	"os"
	"path/filepath"
	"strings"
	"time"
	"skraak/db"
	"skraak/utils"
)
// ImportUnstructuredInput defines the input parameters for importing files into an unstructured dataset
type ImportUnstructuredInput struct {
	DBPath     string `json:"db_path"`
	DatasetID  string `json:"dataset_id"`
	FolderPath string `json:"folder_path"`
	Recursive  *bool  `json:"recursive,omitempty"`
}
// ImportUnstructuredOutput defines the output structure
type ImportUnstructuredOutput struct {
	TotalFiles     int                     `json:"total_files"`
	ImportedFiles  int                     `json:"imported_files"`
	SkippedFiles   int                     `json:"skipped_files"` // Duplicates
	FailedFiles    int                     `json:"failed_files"`
	TotalDuration  float64                 `json:"total_duration_seconds"`
	ProcessingTime string                  `json:"processing_time"`
	Errors         []utils.FileImportError `json:"errors,omitempty"`
}
// ImportUnstructured imports WAV files into an unstructured dataset
// Files are stored with minimal metadata: hash, duration, sample_rate, file_mod_time as timestamp
// No location/cluster hierarchy, no astronomical data, no AudioMoth parsing
func ImportUnstructured(
	ctx context.Context,
	input ImportUnstructuredInput,
) (ImportUnstructuredOutput, error) {
	startTime := time.Now()
	var output ImportUnstructuredOutput
	// Default recursive to true
	recursive := true
	if input.Recursive != nil {
		recursive = *input.Recursive
	}
	// Validate input
	if err := validateUnstructuredInput(input); err != nil {
		return output, fmt.Errorf("validation failed: %w", err)
	}
	// Scan for WAV files (no DB needed)
	files, scanErrors := scanWavFiles(input.FolderPath, recursive)
	output.Errors = append(output.Errors, scanErrors...)
	output.TotalFiles = len(files)
	if len(files) == 0 {
		output.ProcessingTime = time.Since(startTime).String()
		return output, nil
	}
	err := db.WithWriteTx(ctx, db.ResolveDBPath(input.DBPath, ""), "import_unstructured", func(database *sql.DB, tx *db.LoggedTx) error {
		// Process each file
		for _, filePath := range files {
			fileResult, procErr := processUnstructuredFile(tx, filePath, input.DatasetID)
			if procErr != nil {
				output.FailedFiles++
				output.Errors = append(output.Errors, utils.FileImportError{
					FileName: filepath.Base(filePath),
					Error:    procErr.Error(),
					Stage:    utils.StageProcess,
				})
				continue
			}
			if fileResult.Skipped {
				output.SkippedFiles++
			} else {
				output.ImportedFiles++
				output.TotalDuration += fileResult.Duration
			}
		}
		return nil
	})
	if err != nil {
		return output, err
	}
	output.ProcessingTime = time.Since(startTime).String()
	return output, nil
}
// unstructuredFileResult holds the result of processing a single file
type unstructuredFileResult struct {
	Skipped  bool    // True if duplicate
	Duration float64 // Duration in seconds
}
// processUnstructuredFile processes a single WAV file for unstructured import
func processUnstructuredFile(tx *db.LoggedTx, filePath, datasetID string) (*unstructuredFileResult, error) {
	result := &unstructuredFileResult{}
	// Step 1: Parse WAV header
	metadata, err := utils.ParseWAVHeader(filePath)
	if err != nil {
		return nil, fmt.Errorf("WAV header parsing failed: %w", err)
	}
	// Step 2: Calculate hash
	hash, err := utils.ComputeXXH64(filePath)
	if err != nil {
		return nil, fmt.Errorf("hash calculation failed: %w", err)
	}
	// Step 3: Check for duplicate - if exists, skip entirely (do not link to dataset)
	_, isDuplicate, err := utils.CheckDuplicateHash(tx, hash)
	if err != nil {
		return nil, fmt.Errorf("duplicate check failed: %w", err)
	}
	if isDuplicate {
		// File already exists in database - skip completely, do not link to dataset
		result.Skipped = true
		result.Duration = metadata.Duration
		return result, nil
	}
	// Step 4: Generate file ID
	fileID, err := utils.GenerateLongID()
	if err != nil {
		return nil, fmt.Errorf("ID generation failed: %w", err)
	}
	// Step 5: Use file modification time as timestamp (no timezone conversion)
	timestamp := metadata.FileModTime
	// Step 6: Insert into file table
	_, err = tx.Exec(`
		INSERT INTO file (
			id, file_name, xxh64_hash, location_id, cluster_id,
			timestamp_local, duration, sample_rate,
			maybe_solar_night, maybe_civil_night, moon_phase,
			active
		) VALUES (?, ?, ?, NULL, NULL, ?, ?, ?, NULL, NULL, NULL, TRUE)
	`,
		fileID,
		filepath.Base(filePath),
		hash,
		timestamp,
		metadata.Duration,
		metadata.SampleRate,
	)
	if err != nil {
		return nil, fmt.Errorf("file insert failed: %w", err)
	}
	// Step 7: Insert into file_dataset table
	_, err = tx.Exec(
		"INSERT INTO file_dataset (file_id, dataset_id) VALUES (?, ?)",
		fileID, datasetID,
	)
	if err != nil {
		return nil, fmt.Errorf("file_dataset insert failed: %w", err)
	}
	result.Duration = metadata.Duration
	return result, nil
}
// validateUnstructuredInput validates the input parameters
func validateUnstructuredInput(input ImportUnstructuredInput) error {
	// Validate dataset ID format
	if err := utils.ValidateShortID(input.DatasetID, "dataset_id"); err != nil {
		return err
	}
	// Verify folder exists
	info, err := os.Stat(input.FolderPath)
	if err != nil {
		return fmt.Errorf("folder not accessible: %w", err)
	}
	if !info.IsDir() {
		return fmt.Errorf("path is not a directory: %s", input.FolderPath)
	}
	return db.WithReadDB(db.ResolveDBPath(input.DBPath, ""), func(database *sql.DB) error {
		// Verify dataset exists and is active
		if _, err := db.DatasetExistsAndActive(database, input.DatasetID); err != nil {
			return err
		}
		// Verify dataset is 'unstructured' type
		if err := db.ValidateDatasetTypeUnstructured(database, input.DatasetID); err != nil {
			return err
		}
		return nil
	})
}
// scanWavFiles scans a folder for WAV files
func scanWavFiles(folderPath string, recursive bool) ([]string, []utils.FileImportError) {
	var files []string
	var errors []utils.FileImportError
	walkFunc := func(path string, d fs.DirEntry, err error) error {
		if err != nil {
			errors = append(errors, utils.FileImportError{
				FileName: path,
				Error:    err.Error(),
				Stage:    utils.StageScan,
			})
			return nil
		}
		// Skip directories if not recursive
		if d.IsDir() {
			if !recursive && path != folderPath {
				return fs.SkipDir
			}
			return nil
		}
		// Check for .wav extension (case-insensitive)
		if strings.HasSuffix(strings.ToLower(d.Name()), ".wav") {
			files = append(files, path)
		}
		return nil
	}
	if recursive {
		if err := filepath.WalkDir(folderPath, walkFunc); err != nil {
			errors = append(errors, utils.FileImportError{
				FileName: folderPath,
				Error:    err.Error(),
				Stage:    utils.StageScan,
			})
		}
	} else {
		// Non-recursive: only scan top-level
		entries, err := os.ReadDir(folderPath)
		if err != nil {
			errors = append(errors, utils.FileImportError{
				FileName: folderPath,
				Error:    err.Error(),
				Stage:    utils.StageScan,
			})
			return nil, errors
		}
		for _, entry := range entries {
			if !entry.IsDir() && strings.HasSuffix(strings.ToLower(entry.Name()), ".wav") {
				files = append(files, filepath.Join(folderPath, entry.Name()))
			}
		}
	}
	return files, errors
}

File addition: import_segments_test.go (----------)

[0.1]

package imp
import (
	"testing"
	"skraak/utils"
)
func TestValidateSegmentImportInput(t *testing.T) {
	t.Run("invalid dataset ID - too short", func(t *testing.T) {
		input := ImportSegmentsInput{
			DatasetID: "abc",
		}
		err := validateSegmentImportInput(input)
		if err == nil {
			t.Fatal("expected error for short dataset ID")
		}
	})
	t.Run("invalid dataset ID - too long", func(t *testing.T) {
		input := ImportSegmentsInput{
			DatasetID: "abc123def456ghi789",
		}
		err := validateSegmentImportInput(input)
		if err == nil {
			t.Fatal("expected error for long dataset ID")
		}
	})
	t.Run("invalid dataset ID - invalid characters", func(t *testing.T) {
		input := ImportSegmentsInput{
			DatasetID: "abc123!!!456",
		}
		err := validateSegmentImportInput(input)
		if err == nil {
			t.Fatal("expected error for invalid characters in dataset ID")
		}
	})
	t.Run("invalid location ID", func(t *testing.T) {
		input := ImportSegmentsInput{
			DatasetID:  "abc123def456",
			LocationID: "invalid",
		}
		err := validateSegmentImportInput(input)
		if err == nil {
			t.Fatal("expected error for invalid location ID")
		}
	})
	t.Run("invalid cluster ID", func(t *testing.T) {
		input := ImportSegmentsInput{
			DatasetID:  "abc123def456",
			LocationID: "xyz789uvw012",
			ClusterID:  "invalid",
		}
		err := validateSegmentImportInput(input)
		if err == nil {
			t.Fatal("expected error for invalid cluster ID")
		}
	})
}
func TestCountTotalSegments(t *testing.T) {
	t.Run("empty", func(t *testing.T) {
		count := countTotalSegments(map[string]scannedDataFile{})
		if count != 0 {
			t.Errorf("expected 0, got %d", count)
		}
	})
	t.Run("single file - no segments", func(t *testing.T) {
		files := map[string]scannedDataFile{
			"file1": {Segments: []*utils.Segment{}},
		}
		count := countTotalSegments(files)
		if count != 0 {
			t.Errorf("expected 0, got %d", count)
		}
	})
	t.Run("single file - multiple segments", func(t *testing.T) {
		files := map[string]scannedDataFile{
			"file1": {Segments: []*utils.Segment{{}, {}, {}}},
		}
		count := countTotalSegments(files)
		if count != 3 {
			t.Errorf("expected 3, got %d", count)
		}
	})
	t.Run("multiple files", func(t *testing.T) {
		files := map[string]scannedDataFile{
			"file1": {Segments: []*utils.Segment{{}, {}}},
			"file2": {Segments: []*utils.Segment{{}}},
			"file3": {Segments: []*utils.Segment{{}, {}, {}, {}}},
		}
		count := countTotalSegments(files)
		if count != 7 {
			t.Errorf("expected 7, got %d", count)
		}
	})
}

File addition: import_segments.go (----------)

[0.1]

package imp
import (
	"context"
	"database/sql"
	"fmt"
	"os"
	"path/filepath"
	"strings"
	"time"
	"skraak/db"
	"skraak/utils"
)
// ImportSegmentsInput defines the input parameters for the import_segments tool
type ImportSegmentsInput struct {
	DBPath          string `json:"db_path"`
	Folder          string `json:"folder"`
	Mapping         string `json:"mapping"`
	DatasetID       string `json:"dataset_id"`
	LocationID      string `json:"location_id"`
	ClusterID       string `json:"cluster_id"`
	ProgressHandler func(processed, total int, message string)
}
// ImportSegmentsOutput defines the output structure for the import_segments tool
type ImportSegmentsOutput struct {
	Summary  ImportSegmentsSummary `json:"summary"`
	Segments []SegmentImport       `json:"segments"`
	Errors   []ImportSegmentError  `json:"errors,omitempty"`
}
// ImportSegmentsSummary provides summary statistics for the import operation
type ImportSegmentsSummary struct {
	DataFilesFound     int   `json:"data_files_found"`
	DataFilesProcessed int   `json:"data_files_processed"`
	TotalSegments      int   `json:"total_segments"`
	ImportedSegments   int   `json:"imported_segments"`
	ImportedLabels     int   `json:"imported_labels"`
	ImportedSubtypes   int   `json:"imported_subtypes"`
	ProcessingTimeMs   int64 `json:"processing_time_ms"`
}
// SegmentImport represents an imported segment in the output
type SegmentImport struct {
	SegmentID string        `json:"segment_id"`
	FileName  string        `json:"file_name"`
	StartTime float64       `json:"start_time"`
	EndTime   float64       `json:"end_time"`
	FreqLow   float64       `json:"freq_low"`
	FreqHigh  float64       `json:"freq_high"`
	Labels    []LabelImport `json:"labels"`
}
// LabelImport represents an imported label in the output
type LabelImport struct {
	LabelID   string `json:"label_id"`
	Species   string `json:"species"`
	CallType  string `json:"calltype,omitempty"`
	Filter    string `json:"filter"`
	Certainty int    `json:"certainty"`
	Comment   string `json:"comment,omitempty"`
}
// ImportSegmentError records errors encountered during segment import
type ImportSegmentError struct {
	File    string            `json:"file,omitempty"`
	Stage   utils.ImportStage `json:"stage"`
	Message string            `json:"message"`
}
// scannedDataFile holds parsed data for a .data file
type scannedDataFile struct {
	DataPath string
	WavPath  string
	WavHash  string
	FileID   string
	Duration float64
	Segments []*utils.Segment
}
// segmentValidation holds the results of pre-import validation (phases B+C).
type segmentValidation struct {
	scannedFiles  []scannedDataFile
	filterIDMap   map[string]string
	speciesIDMap  map[string]string
	calltypeIDMap map[string]map[string]string
	fileIDMap     map[string]scannedDataFile
}
// validateAndPrepareSegments performs phases B+C: parse data files, validate DB state, and prepare ID maps.
func validateAndPrepareSegments(
	database *sql.DB,
	input ImportSegmentsInput,
	mapping utils.MappingFile,
	dataFiles []string,
) (*segmentValidation, []ImportSegmentError, error) {
	// Phase B: Parse all .data files and collect unique values
	scannedFiles, parseErrors, uniqueFilters, uniqueSpecies, uniqueCalltypes := scanAllDataFiles(dataFiles, input.Folder)
	if len(scannedFiles) == 0 {
		return nil, parseErrors, nil
	}
	// Validate dataset/location/cluster hierarchy
	if err := validateSegmentHierarchy(database, input.DatasetID, input.LocationID, input.ClusterID); err != nil {
		return nil, parseErrors, err
	}
	// Validate all filters exist
	filterIDMap, err := validateFiltersExist(database, uniqueFilters)
	if err != nil {
		return nil, parseErrors, fmt.Errorf("filter validation failed: %w", err)
	}
	// Validate mapping covers all species/calltypes and they exist in DB
	validationResult, err := utils.ValidateMappingAgainstDB(database, mapping, uniqueSpecies, uniqueCalltypes)
	if err != nil {
		return nil, parseErrors, fmt.Errorf("mapping validation failed: %w", err)
	}
	if validationResult.HasErrors() {
		return nil, parseErrors, fmt.Errorf("mapping validation failed: %s", validationResult.Error())
	}
	// Load species and calltype ID maps
	speciesIDMap, calltypeIDMap, err := loadSpeciesCalltypeIDs(database, mapping, uniqueSpecies, uniqueCalltypes)
	if err != nil {
		return nil, parseErrors, fmt.Errorf("failed to load species/calltype IDs: %w", err)
	}
	// Validate files: hash exists, linked to dataset, no existing labels
	fileIDMap, hashErrors := validateAndMapFiles(database, scannedFiles, input.ClusterID, input.DatasetID)
	allErrors := append(parseErrors, hashErrors...)
	return &segmentValidation{
		scannedFiles:  scannedFiles,
		filterIDMap:   filterIDMap,
		speciesIDMap:  speciesIDMap,
		calltypeIDMap: calltypeIDMap,
		fileIDMap:     fileIDMap,
	}, allErrors, nil
}
// ImportSegments imports segments from AviaNZ .data files into the database
func ImportSegments(ctx context.Context, input ImportSegmentsInput) (ImportSegmentsOutput, error) {
	startTime := time.Now()
	var output ImportSegmentsOutput
	output.Segments = make([]SegmentImport, 0)
	output.Errors = make([]ImportSegmentError, 0)
	// Phase A: Input Validation
	if err := validateSegmentImportInput(input); err != nil {
		return output, err
	}
	// Load mapping file
	mapping, err := utils.LoadMappingFile(input.Mapping)
	if err != nil {
		return output, fmt.Errorf("failed to load mapping file: %w", err)
	}
	// Find .data files
	dataFiles, err := utils.FindDataFiles(input.Folder)
	if err != nil {
		return output, fmt.Errorf("failed to find .data files: %w", err)
	}
	output.Summary.DataFilesFound = len(dataFiles)
	if len(dataFiles) == 0 {
		return output, fmt.Errorf("no .data files found in folder: %s", input.Folder)
	}
	// Phase B+C: Parse data files and validate against DB
	database, err := db.OpenWriteableDB(db.ResolveDBPath(input.DBPath, ""))
	if err != nil {
		return output, fmt.Errorf("failed to open database: %w", err)
	}
	defer database.Close()
	val, valErrors, err := validateAndPrepareSegments(database, input, mapping, dataFiles)
	output.Errors = append(output.Errors, valErrors...)
	if err != nil {
		return output, err
	}
	if val == nil || len(val.fileIDMap) == 0 {
		output.Summary.ProcessingTimeMs = time.Since(startTime).Milliseconds()
		return output, nil
	}
	// Phase D: Transactional Import
	importedSegments, importedLabels, importedSubtypes, fileUpdates, importErrors := importSegmentsIntoDB(
		ctx, database, val.fileIDMap, val.scannedFiles, mapping, val.filterIDMap, val.speciesIDMap, val.calltypeIDMap, input.DatasetID, input.ProgressHandler,
	)
	output.Errors = append(output.Errors, importErrors...)
	output.Segments = append(output.Segments, importedSegments...)
	// Phase E: Write IDs back to .data files
	if len(fileUpdates) > 0 {
		writeErrors := writeIDsToDataFiles(fileUpdates)
		output.Errors = append(output.Errors, writeErrors...)
	}
	output.Summary.DataFilesProcessed = len(val.fileIDMap)
	output.Summary.TotalSegments = countTotalSegments(val.fileIDMap)
	output.Summary.ImportedSegments = len(importedSegments)
	output.Summary.ImportedLabels = importedLabels
	output.Summary.ImportedSubtypes = importedSubtypes
	output.Summary.ProcessingTimeMs = time.Since(startTime).Milliseconds()
	return output, nil
}
// validateSegmentImportInput validates input parameters
func validateSegmentImportInput(input ImportSegmentsInput) error {
	// Validate folder exists
	if info, err := os.Stat(input.Folder); err != nil {
		return fmt.Errorf("folder does not exist: %s", input.Folder)
	} else if !info.IsDir() {
		return fmt.Errorf("path is not a folder: %s", input.Folder)
	}
	// Validate mapping file exists
	if _, err := os.Stat(input.Mapping); err != nil {
		return fmt.Errorf("mapping file does not exist: %s", input.Mapping)
	}
	// Validate IDs
	if err := utils.ValidateShortID(input.DatasetID, "dataset_id"); err != nil {
		return err
	}
	if err := utils.ValidateShortID(input.LocationID, "location_id"); err != nil {
		return err
	}
	if err := utils.ValidateShortID(input.ClusterID, "cluster_id"); err != nil {
		return err
	}
	return nil
}
// validateSegmentHierarchy validates dataset/location/cluster relationships
func validateSegmentHierarchy(dbConn *sql.DB, datasetID, locationID, clusterID string) error {
	// Validate dataset exists and is structured
	if err := db.ValidateDatasetTypeForImport(dbConn, datasetID); err != nil {
		return err
	}
	// Validate location belongs to dataset
	if err := db.ValidateLocationBelongsToDataset(dbConn, locationID, datasetID); err != nil {
		return err
	}
	// Validate cluster belongs to location
	if err := db.ClusterBelongsToLocation(dbConn, clusterID, locationID); err != nil {
		return err
	}
	return nil
}
// scanAllDataFiles parses all .data files and collects unique values
func scanAllDataFiles(dataFiles []string, folder string) (
	[]scannedDataFile,
	[]ImportSegmentError,
	map[string]bool,
	map[string]bool,
	map[string]map[string]bool,
) {
	var scanned []scannedDataFile
	var errors []ImportSegmentError
	uniqueFilters := make(map[string]bool)
	uniqueSpecies := make(map[string]bool)
	uniqueCalltypes := make(map[string]map[string]bool) // species -> calltype -> true
	for _, dataPath := range dataFiles {
		// Find corresponding WAV file
		wavPath := strings.TrimSuffix(dataPath, ".data")
		if _, err := os.Stat(wavPath); err != nil {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(dataPath),
				Stage:   utils.StageValidation,
				Message: fmt.Sprintf("corresponding WAV file not found: %s", filepath.Base(wavPath)),
			})
			continue
		}
		// Parse .data file
		df, err := utils.ParseDataFile(dataPath)
		if err != nil {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(dataPath),
				Stage:   utils.StageValidation,
				Message: fmt.Sprintf("failed to parse .data file: %v", err),
			})
			continue
		}
		// Collect unique filters, species, calltypes
		for _, seg := range df.Segments {
			for _, label := range seg.Labels {
				uniqueFilters[label.Filter] = true
				uniqueSpecies[label.Species] = true
				if label.CallType != "" {
					if uniqueCalltypes[label.Species] == nil {
						uniqueCalltypes[label.Species] = make(map[string]bool)
					}
					uniqueCalltypes[label.Species][label.CallType] = true
				}
			}
		}
		scanned = append(scanned, scannedDataFile{
			DataPath: dataPath,
			WavPath:  wavPath,
			Duration: df.Meta.Duration,
			Segments: df.Segments,
		})
	}
	return scanned, errors, uniqueFilters, uniqueSpecies, uniqueCalltypes
}
// validateFiltersExist checks all filters exist in DB and returns ID map
func validateFiltersExist(dbConn *sql.DB, filterNames map[string]bool) (map[string]string, error) {
	filterIDMap := make(map[string]string)
	if len(filterNames) == 0 {
		return filterIDMap, nil
	}
	names := make([]string, 0, len(filterNames))
	for name := range filterNames {
		names = append(names, name)
	}
	query := `SELECT id, name FROM filter WHERE name IN (` + db.Placeholders(len(names)) + `) AND active = true`
	args := make([]any, len(names))
	for i, name := range names {
		args[i] = name
	}
	rows, err := dbConn.Query(query, args...)
	if err != nil {
		return nil, fmt.Errorf("failed to query filters: %w", err)
	}
	defer rows.Close()
	for rows.Next() {
		var id, name string
		if err := rows.Scan(&id, &name); err == nil {
			filterIDMap[name] = id
		}
	}
	// Check for missing filters
	var missing []string
	for name := range filterNames {
		if _, exists := filterIDMap[name]; !exists {
			missing = append(missing, name)
		}
	}
	if len(missing) > 0 {
		return nil, fmt.Errorf("filters not found in database: [%s]", strings.Join(missing, ", "))
	}
	return filterIDMap, nil
}
// loadSpeciesCalltypeIDs loads species and calltype ID maps
func loadSpeciesCalltypeIDs(
	dbConn *sql.DB,
	mapping utils.MappingFile,
	uniqueSpecies map[string]bool,
	uniqueCalltypes map[string]map[string]bool,
) (map[string]string, map[string]map[string]string, error) {
	speciesIDMap := make(map[string]string)
	calltypeIDMap := make(map[string]map[string]string) // (dbSpecies, dbCalltype) -> calltype_id
	// Collect all DB species labels from mapping
	dbSpeciesSet := make(map[string]bool)
	for dataSpecies := range uniqueSpecies {
		if dbSpecies, ok := mapping.GetDBSpecies(dataSpecies); ok {
			dbSpeciesSet[dbSpecies] = true
		}
	}
	// Load species IDs
	if len(dbSpeciesSet) > 0 {
		dbSpeciesList := make([]string, 0, len(dbSpeciesSet))
		for s := range dbSpeciesSet {
			dbSpeciesList = append(dbSpeciesList, s)
		}
		query := `SELECT id, label FROM species WHERE label IN (` + db.Placeholders(len(dbSpeciesList)) + `) AND active = true`
		args := make([]any, len(dbSpeciesList))
		for i, s := range dbSpeciesList {
			args[i] = s
		}
		rows, err := dbConn.Query(query, args...)
		if err != nil {
			return nil, nil, fmt.Errorf("failed to query species: %w", err)
		}
		defer rows.Close()
		for rows.Next() {
			var id, label string
			if err := rows.Scan(&id, &label); err == nil {
				speciesIDMap[label] = id
			}
		}
	}
	// Load calltype IDs
	for dataSpecies, ctSet := range uniqueCalltypes {
		dbSpecies, ok := mapping.GetDBSpecies(dataSpecies)
		if !ok {
			continue
		}
		if calltypeIDMap[dbSpecies] == nil {
			calltypeIDMap[dbSpecies] = make(map[string]string)
		}
		for dataCalltype := range ctSet {
			dbCalltype := mapping.GetDBCalltype(dataSpecies, dataCalltype)
			// Query calltype ID
			var calltypeID string
			err := dbConn.QueryRow(`
				SELECT ct.id
				FROM call_type ct
				JOIN species s ON ct.species_id = s.id
				WHERE s.label = ? AND ct.label = ? AND ct.active = true
			`, dbSpecies, dbCalltype).Scan(&calltypeID)
			if err == nil {
				calltypeIDMap[dbSpecies][dbCalltype] = calltypeID
			}
		}
	}
	return speciesIDMap, calltypeIDMap, nil
}
// validateAndMapFiles validates files exist by hash, are linked to dataset, and have no existing labels
func validateAndMapFiles(
	dbConn *sql.DB,
	scannedFiles []scannedDataFile,
	clusterID string,
	datasetID string,
) (map[string]scannedDataFile, []ImportSegmentError) {
	fileIDMap := make(map[string]scannedDataFile)
	var errors []ImportSegmentError
	for _, sf := range scannedFiles {
		// Compute hash
		hash, err := utils.ComputeXXH64(sf.WavPath)
		if err != nil {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(sf.WavPath),
				Stage:   utils.StageHash,
				Message: fmt.Sprintf("failed to compute hash: %v", err),
			})
			continue
		}
		sf.WavHash = hash
		// Find file by hash in cluster
		var fileID string
		var duration float64
		err = dbConn.QueryRow(`
			SELECT id, duration FROM file WHERE xxh64_hash = ? AND cluster_id = ? AND active = true
		`, hash, clusterID).Scan(&fileID, &duration)
		if err == sql.ErrNoRows {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(sf.WavPath),
				Stage:   utils.StageValidation,
				Message: fmt.Sprintf("file hash not found in database for cluster (hash: %s)", hash),
			})
			continue
		}
		if err != nil {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(sf.WavPath),
				Stage:   utils.StageValidation,
				Message: fmt.Sprintf("failed to query file: %v", err),
			})
			continue
		}
		sf.FileID = fileID
		sf.Duration = duration
		// Verify file is linked to dataset via file_dataset junction table (composite FK)
		var fileLinkedToDataset bool
		err = dbConn.QueryRow(`
			SELECT EXISTS(SELECT 1 FROM file_dataset WHERE file_id = ? AND dataset_id = ?)
		`, fileID, datasetID).Scan(&fileLinkedToDataset)
		if err != nil {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(sf.WavPath),
				Stage:   utils.StageValidation,
				Message: fmt.Sprintf("failed to verify file-dataset link: %v", err),
			})
			continue
		}
		if !fileLinkedToDataset {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(sf.WavPath),
				Stage:   utils.StageValidation,
				Message: fmt.Sprintf("file exists in cluster but is not linked to dataset %s", datasetID),
			})
			continue
		}
		// Check no existing labels for this file
		var labelCount int
		err = dbConn.QueryRow(`
			SELECT COUNT(*) FROM label l
			JOIN segment s ON l.segment_id = s.id
			WHERE s.file_id = ? AND l.active = true
		`, fileID).Scan(&labelCount)
		if err != nil {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(sf.WavPath),
				Stage:   utils.StageValidation,
				Message: fmt.Sprintf("failed to check existing labels: %v", err),
			})
			continue
		}
		if labelCount > 0 {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(sf.WavPath),
				Stage:   utils.StageValidation,
				Message: fmt.Sprintf("file already has %d label(s) - fresh imports only", labelCount),
			})
			continue
		}
		fileIDMap[fileID] = sf
	}
	return fileIDMap, errors
}
// dataFileUpdate holds data to write back to .data file after import
type dataFileUpdate struct {
	DataPath string
	WavHash  string
	LabelIDs map[int]map[int]string // segmentIndex -> labelIndex -> labelID
}
// importLabelResult holds the result of importing a single label.
type importLabelResult struct {
	labelImport      LabelImport
	labelID          string
	subtypesImported int
	err              ImportSegmentError
	hasError         bool
}
// importSingleLabel inserts a single label and its metadata/subtype into the DB.
func importSingleLabel(
	ctx context.Context,
	tx *db.LoggedTx,
	label *utils.Label,
	segmentID string,
	segIdx, labelIdx int,
	sf scannedDataFile,
	mapping utils.MappingFile,
	filterIDMap map[string]string,
	speciesIDMap map[string]string,
	calltypeIDMap map[string]map[string]string,
) importLabelResult {
	dbSpecies, ok := mapping.GetDBSpecies(label.Species)
	if !ok {
		return importLabelResult{err: ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("species not found in mapping: %s", label.Species),
		}, hasError: true}
	}
	speciesID, ok := speciesIDMap[dbSpecies]
	if !ok {
		return importLabelResult{err: ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("species ID not found: %s", dbSpecies),
		}, hasError: true}
	}
	filterID, ok := filterIDMap[label.Filter]
	if !ok {
		return importLabelResult{err: ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("filter ID not found: %s", label.Filter),
		}, hasError: true}
	}
	labelID, err := utils.GenerateLongID()
	if err != nil {
		return importLabelResult{err: ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("failed to generate label ID: %v", err),
		}, hasError: true}
	}
	_, err = tx.ExecContext(ctx, `
		INSERT INTO label (id, segment_id, species_id, filter_id, certainty, created_at, last_modified, active)
		VALUES (?, ?, ?, ?, ?, now(), now(), true)
	`, labelID, segmentID, speciesID, filterID, label.Certainty)
	if err != nil {
		return importLabelResult{err: ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("failed to insert label: %v", err),
		}, hasError: true}
	}
	// Insert label_metadata if comment exists
	if label.Comment != "" {
		escapedComment := strings.ReplaceAll(label.Comment, `"`, `\"`)
		metadataJSON := fmt.Sprintf(`{"comment": "%s"}`, escapedComment)
		if _, err := tx.ExecContext(ctx, `
			INSERT INTO label_metadata (label_id, json, created_at, last_modified, active)
			VALUES (?, ?, now(), now(), true)
		`, labelID, metadataJSON); err != nil {
			return importLabelResult{err: ImportSegmentError{
				File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
				Message: fmt.Sprintf("failed to insert label_metadata: %v", err),
			}, hasError: true}
		}
	}
	labelImport := LabelImport{
		LabelID:   labelID,
		Species:   dbSpecies,
		Filter:    label.Filter,
		Certainty: label.Certainty,
	}
	if label.Comment != "" {
		labelImport.Comment = label.Comment
	}
	// Insert label_subtype if calltype exists
	if label.CallType != "" {
		if err := importCalltype(ctx, tx, labelID, label, dbSpecies, filterID, mapping, calltypeIDMap, sf); err != nil {
			return importLabelResult{err: *err, hasError: true}
		}
		labelImport.CallType = mapping.GetDBCalltype(label.Species, label.CallType)
		return importLabelResult{labelImport: labelImport, labelID: labelID, subtypesImported: 1}
	}
	return importLabelResult{labelImport: labelImport, labelID: labelID}
}
// importCalltype inserts a label_subtype row for a calltype label.
func importCalltype(
	ctx context.Context,
	tx *db.LoggedTx,
	labelID string,
	label *utils.Label,
	dbSpecies string,
	filterID string,
	mapping utils.MappingFile,
	calltypeIDMap map[string]map[string]string,
	sf scannedDataFile,
) *ImportSegmentError {
	dbCalltype := mapping.GetDBCalltype(label.Species, label.CallType)
	calltypeID := ""
	if calltypeIDMap[dbSpecies] != nil {
		calltypeID = calltypeIDMap[dbSpecies][dbCalltype]
	}
	if calltypeID == "" {
		return &ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("calltype ID not found: %s/%s", dbSpecies, dbCalltype),
		}
	}
	subtypeID, err := utils.GenerateLongID()
	if err != nil {
		return &ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("failed to generate label_subtype ID: %v", err),
		}
	}
	_, err = tx.ExecContext(ctx, `
		INSERT INTO label_subtype (id, label_id, calltype_id, filter_id, certainty, created_at, last_modified, active)
		VALUES (?, ?, ?, ?, ?, now(), now(), true)
	`, subtypeID, labelID, calltypeID, filterID, label.Certainty)
	if err != nil {
		return &ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("failed to insert label_subtype: %v", err),
		}
	}
	return nil
}
// importSegmentsIntoDB performs the transactional import
func importSegmentsIntoDB(
	ctx context.Context,
	database *sql.DB,
	fileIDMap map[string]scannedDataFile,
	scannedFiles []scannedDataFile,
	mapping utils.MappingFile,
	filterIDMap map[string]string,
	speciesIDMap map[string]string,
	calltypeIDMap map[string]map[string]string,
	datasetID string,
	progressHandler func(processed, total int, message string),
) ([]SegmentImport, int, int, []dataFileUpdate, []ImportSegmentError) {
	var importedSegments []SegmentImport
	var errors []ImportSegmentError
	importedLabels := 0
	importedSubtypes := 0
	var fileUpdates []dataFileUpdate
	tx, err := db.BeginLoggedTx(ctx, database, "import_segments")
	if err != nil {
		errors = append(errors, ImportSegmentError{
			Stage:   utils.StageImport,
			Message: fmt.Sprintf("failed to begin transaction: %v", err),
		})
		return nil, 0, 0, nil, errors
	}
	defer tx.Rollback()
	totalFiles := len(fileIDMap)
	processedFiles := 0
	for _, sf := range fileIDMap {
		if sf.FileID == "" {
			continue
		}
		processedFiles++
		if progressHandler != nil {
			progressHandler(processedFiles, totalFiles, filepath.Base(sf.DataPath))
		}
		fileUpdate := dataFileUpdate{
			DataPath: sf.DataPath,
			WavHash:  sf.WavHash,
			LabelIDs: make(map[int]map[int]string),
		}
		for segIdx, seg := range sf.Segments {
			segImp, labelIDs, subtypes, segErrs := importSegment(ctx, tx, seg, segIdx, sf, datasetID, mapping, filterIDMap, speciesIDMap, calltypeIDMap)
			errors = append(errors, segErrs...)
			importedSubtypes += subtypes
			if len(segImp.Labels) == 0 {
				// Delete orphaned segment (no labels succeeded)
				if _, err := tx.ExecContext(ctx, `DELETE FROM segment WHERE id = ?`, segImp.SegmentID); err != nil {
					errors = append(errors, ImportSegmentError{
						File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
						Message: fmt.Sprintf("failed to delete orphaned segment: %v", err),
					})
				}
			} else {
				importedSegments = append(importedSegments, segImp)
				importedLabels += len(labelIDs)
				fileUpdate.LabelIDs[segIdx] = labelIDs
			}
		}
		fileUpdates = append(fileUpdates, fileUpdate)
	}
	if err := tx.Commit(); err != nil {
		errors = append(errors, ImportSegmentError{
			Stage:   utils.StageImport,
			Message: fmt.Sprintf("failed to commit transaction: %v", err),
		})
		return nil, 0, 0, nil, errors
	}
	return importedSegments, importedLabels, importedSubtypes, fileUpdates, errors
}
// importSegment inserts a single segment and its labels into the DB.
func importSegment(
	ctx context.Context,
	tx *db.LoggedTx,
	seg *utils.Segment,
	segIdx int,
	sf scannedDataFile,
	datasetID string,
	mapping utils.MappingFile,
	filterIDMap map[string]string,
	speciesIDMap map[string]string,
	calltypeIDMap map[string]map[string]string,
) (SegmentImport, map[int]string, int, []ImportSegmentError) {
	var errors []ImportSegmentError
	if seg.StartTime >= seg.EndTime {
		errors = append(errors, ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("invalid segment bounds: start=%.2f >= end=%.2f", seg.StartTime, seg.EndTime),
		})
		return SegmentImport{}, nil, 0, errors
	}
	if seg.EndTime > sf.Duration {
		errors = append(errors, ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("segment end time (%.2f) exceeds file duration (%.2f)", seg.EndTime, sf.Duration),
		})
		return SegmentImport{}, nil, 0, errors
	}
	segmentID, err := utils.GenerateLongID()
	if err != nil {
		errors = append(errors, ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("failed to generate segment ID: %v", err),
		})
		return SegmentImport{}, nil, 0, errors
	}
	_, err = tx.ExecContext(ctx, `
		INSERT INTO segment (id, file_id, dataset_id, start_time, end_time, freq_low, freq_high, created_at, last_modified, active)
		VALUES (?, ?, ?, ?, ?, ?, ?, now(), now(), true)
	`, segmentID, sf.FileID, datasetID, seg.StartTime, seg.EndTime, seg.FreqLow, seg.FreqHigh)
	if err != nil {
		errors = append(errors, ImportSegmentError{
			File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
			Message: fmt.Sprintf("failed to insert segment: %v", err),
		})
		return SegmentImport{}, nil, 0, errors
	}
	segImport := SegmentImport{
		SegmentID: segmentID,
		FileName:  filepath.Base(sf.WavPath),
		StartTime: seg.StartTime,
		EndTime:   seg.EndTime,
		FreqLow:   seg.FreqLow,
		FreqHigh:  seg.FreqHigh,
		Labels:    make([]LabelImport, 0),
	}
	labelIDs := make(map[int]string)
	var subtypesImported int
	for labelIdx, label := range seg.Labels {
		result := importSingleLabel(ctx, tx, label, segmentID, segIdx, labelIdx, sf, mapping, filterIDMap, speciesIDMap, calltypeIDMap)
		if result.hasError {
			errors = append(errors, result.err)
			continue
		}
		labelIDs[labelIdx] = result.labelID
		segImport.Labels = append(segImport.Labels, result.labelImport)
		subtypesImported += result.subtypesImported
	}
	return segImport, labelIDs, subtypesImported, errors
}
// countTotalSegments counts total segments from validated files
func countTotalSegments(fileIDMap map[string]scannedDataFile) int {
	count := 0
	for _, sf := range fileIDMap {
		count += len(sf.Segments)
	}
	return count
}
// writeIDsToDataFiles writes skraak_hash and skraak_label_ids back to .data files
func writeIDsToDataFiles(fileUpdates []dataFileUpdate) []ImportSegmentError {
	var errors []ImportSegmentError
	for _, fu := range fileUpdates {
		// Parse the .data file
		df, err := utils.ParseDataFile(fu.DataPath)
		if err != nil {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(fu.DataPath),
				Stage:   utils.StageImport,
				Message: fmt.Sprintf("failed to re-parse .data file for writing: %v", err),
			})
			continue
		}
		// Write skraak_hash to metadata
		if df.Meta.Extra == nil {
			df.Meta.Extra = make(map[string]any)
		}
		df.Meta.Extra["skraak_hash"] = fu.WavHash
		// Write skraak_label_id to each label
		for segIdx, labelIDs := range fu.LabelIDs {
			if segIdx >= len(df.Segments) {
				continue
			}
			seg := df.Segments[segIdx]
			for labelIdx, labelID := range labelIDs {
				if labelIdx >= len(seg.Labels) {
					continue
				}
				label := seg.Labels[labelIdx]
				if label.Extra == nil {
					label.Extra = make(map[string]any)
				}
				label.Extra["skraak_label_id"] = labelID
			}
		}
		// Write the updated .data file
		if err := df.Write(fu.DataPath); err != nil {
			errors = append(errors, ImportSegmentError{
				File:    filepath.Base(fu.DataPath),
				Stage:   utils.StageImport,
				Message: fmt.Sprintf("failed to write updated .data file: %v", err),
			})
			continue
		}
	}
	return errors
}

File addition: import_files.go (----------)

[0.1]

package imp
import (
	"context"
	"database/sql"
	"fmt"
	"os"
	"time"
	"skraak/db"
	"skraak/utils"
)
// ImportAudioFilesInput defines the input parameters for the import_audio_files tool
type ImportAudioFilesInput struct {
	DBPath     string `json:"db_path"`
	FolderPath string `json:"folder_path"`
	DatasetID  string `json:"dataset_id"`
	LocationID string `json:"location_id"`
	ClusterID  string `json:"cluster_id"`
	Recursive  *bool  `json:"recursive,omitempty"` // *bool because default is true; plain bool would make "not provided" indistinguishable from "false"
}
// ImportAudioFilesOutput defines the output structure for the import_audio_files tool
type ImportAudioFilesOutput struct {
	Summary ImportSummary           `json:"summary"`
	FileIDs []string                `json:"file_ids"`
	Errors  []utils.FileImportError `json:"errors,omitempty"`
}
// ImportSummary provides summary statistics for the import operation
type ImportSummary struct {
	TotalFiles     int     `json:"total_files"`
	ImportedFiles  int     `json:"imported_files"`
	SkippedFiles   int     `json:"skipped_files"` // Duplicates
	FailedFiles    int     `json:"failed_files"`
	AudioMothFiles int     `json:"audiomoth_files"`
	TotalDuration  float64 `json:"total_duration_seconds"`
	ProcessingTime string  `json:"processing_time"`
}
// ImportAudioFiles batch imports WAV files from a folder with hash-based duplicate detection
func ImportAudioFiles(
	ctx context.Context,
	input ImportAudioFilesInput,
) (ImportAudioFilesOutput, error) {
	startTime := time.Now()
	var output ImportAudioFilesOutput
	// Default recursive to true
	recursive := true
	if input.Recursive != nil {
		recursive = *input.Recursive
	}
	// Validate database hierarchy (dataset → location → cluster)
	if err := validateImportInput(input, db.ResolveDBPath(input.DBPath, "")); err != nil {
		return output, fmt.Errorf("validation failed: %w", err)
	}
	// Open database
	database, err := db.OpenWriteableDB(db.ResolveDBPath(input.DBPath, ""))
	if err != nil {
		return output, fmt.Errorf("failed to open database: %w", err)
	}
	defer database.Close()
	// Set cluster path if empty
	err = utils.EnsureClusterPath(database, input.ClusterID, input.FolderPath)
	if err != nil {
		return output, fmt.Errorf("failed to set cluster path: %w", err)
	}
	// Import the cluster (ALL THE LOGIC IS HERE)
	tx, err := db.BeginLoggedTx(ctx, database, "import_audio_files")
	if err != nil {
		return output, fmt.Errorf("failed to begin transaction: %w", err)
	}
	clusterOutput, err := utils.ImportCluster(database, tx.UnderlyingTx(), utils.ClusterImportInput{
		FolderPath: input.FolderPath,
		DatasetID:  input.DatasetID,
		LocationID: input.LocationID,
		ClusterID:  input.ClusterID,
		Recursive:  recursive,
	})
	if err != nil {
		tx.Rollback()
		return output, fmt.Errorf("cluster import failed: %w", err)
	}
	if err := tx.Commit(); err != nil {
		return output, fmt.Errorf("transaction commit failed: %w", err)
	}
	// Map to output format
	output = ImportAudioFilesOutput{
		Summary: ImportSummary{
			TotalFiles:     clusterOutput.TotalFiles,
			ImportedFiles:  clusterOutput.ImportedFiles,
			SkippedFiles:   clusterOutput.SkippedFiles,
			FailedFiles:    clusterOutput.FailedFiles,
			AudioMothFiles: clusterOutput.AudioMothFiles,
			TotalDuration:  clusterOutput.TotalDuration,
			ProcessingTime: time.Since(startTime).String(),
		},
		FileIDs: []string{}, // File IDs not tracked currently
		Errors:  clusterOutput.Errors,
	}
	return output, nil
}
// validateImportInput validates all input parameters and database relationships
func validateImportInput(input ImportAudioFilesInput, dbPath string) error {
	// Verify folder exists
	info, err := os.Stat(input.FolderPath)
	if err != nil {
		return fmt.Errorf("folder not accessible: %w", err)
	}
	if !info.IsDir() {
		return fmt.Errorf("path is not a directory: %s", input.FolderPath)
	}
	return validateHierarchyIDs(input.DatasetID, input.LocationID, input.ClusterID, dbPath)
}
// validateHierarchyIDs validates dataset/location/cluster ID formats and database relationships
func validateHierarchyIDs(datasetID, locationID, clusterID, dbPath string) error {
	// Validate ID formats first (fast fail before DB queries)
	if err := utils.ValidateShortID(datasetID, "dataset_id"); err != nil {
		return err
	}
	if err := utils.ValidateShortID(locationID, "location_id"); err != nil {
		return err
	}
	if err := utils.ValidateShortID(clusterID, "cluster_id"); err != nil {
		return err
	}
	return db.WithReadDB(dbPath, func(database *sql.DB) error {
		// Verify dataset exists, is active, and is 'structured' type
		if err := db.ValidateDatasetTypeForImport(database, datasetID); err != nil {
			return err
		}
		// Verify location exists and belongs to dataset
		if err := db.ValidateLocationBelongsToDataset(database, locationID, datasetID); err != nil {
			return err
		}
		// Verify cluster exists and belongs to location
		if err := db.ClusterBelongsToLocation(database, clusterID, locationID); err != nil {
			return err
		}
		return nil
	})
}

File addition: import_file.go (----------)

[0.1]

package imp
import (
	"context"
	"database/sql"
	"fmt"
	"os"
	"path/filepath"
	"strings"
	"time"
	"skraak/db"
	"skraak/utils"
)
// ImportFileInput defines the input parameters for the import_file tool
type ImportFileInput struct {
	DBPath     string `json:"db_path"`
	FilePath   string `json:"file_path"`
	DatasetID  string `json:"dataset_id"`
	LocationID string `json:"location_id"`
	ClusterID  string `json:"cluster_id"`
}
// ImportFileOutput defines the output structure for the import_file tool
type ImportFileOutput struct {
	FileID         string    `json:"file_id"`
	FileName       string    `json:"file_name"`
	Hash           string    `json:"hash"`
	Duration       float64   `json:"duration_seconds"`
	SampleRate     int       `json:"sample_rate"`
	TimestampLocal time.Time `json:"timestamp_local"`
	IsAudioMoth    bool      `json:"is_audiomoth"`
	IsDuplicate    bool      `json:"is_duplicate"`
	ProcessingTime string    `json:"processing_time"`
	Error          *string   `json:"error,omitempty"`
}
// ImportFile imports a single WAV file into the database with duplicate detection
func ImportFile(
	ctx context.Context,
	input ImportFileInput,
) (ImportFileOutput, error) {
	startTime := time.Now()
	var output ImportFileOutput
	// Phase 1: Validate file path
	_, err := validateFilePath(input.FilePath)
	if err != nil {
		return output, fmt.Errorf("file validation failed: %w", err)
	}
	output.FileName = filepath.Base(input.FilePath)
	// Phase 2: Validate database hierarchy
	if err := validateHierarchyIDs(input.DatasetID, input.LocationID, input.ClusterID, db.ResolveDBPath(input.DBPath, "")); err != nil {
		return output, fmt.Errorf("hierarchy validation failed: %w", err)
	}
	// Phase 3: Open database connection (single connection for all DB operations)
	database, err := db.OpenWriteableDB(db.ResolveDBPath(input.DBPath, ""))
	if err != nil {
		return output, fmt.Errorf("database connection failed: %w", err)
	}
	defer database.Close()
	// Phase 4: Get location data for astronomical calculations
	locData, err := utils.GetLocationData(database, input.LocationID)
	if err != nil {
		return output, fmt.Errorf("failed to get location data: %w", err)
	}
	// Phase 5: Process file metadata
	result, err := utils.ProcessSingleFile(input.FilePath, locData.Latitude, locData.Longitude, locData.TimezoneID, true)
	if err != nil {
		errMsg := err.Error()
		output.Error = &errMsg
		output.ProcessingTime = time.Since(startTime).String()
		return output, fmt.Errorf("file processing failed: %w", err)
	}
	// Populate output with extracted metadata
	output.FileName = result.FileName
	output.Hash = result.Hash
	output.Duration = result.Duration
	output.SampleRate = result.SampleRate
	output.TimestampLocal = result.TimestampLocal
	output.IsAudioMoth = result.IsAudioMoth
	// Phase 6: Ensure cluster path is set
	if err := utils.EnsureClusterPath(database, input.ClusterID, filepath.Dir(input.FilePath)); err != nil {
		return output, fmt.Errorf("failed to set cluster path: %w", err)
	}
	// Phase 7: Insert into database
	fileID, isDuplicate, err := insertFileIntoDB(ctx, database, result, input.DatasetID, input.ClusterID, input.LocationID)
	if err != nil {
		errMsg := err.Error()
		output.Error = &errMsg
		output.ProcessingTime = time.Since(startTime).String()
		return output, fmt.Errorf("database insertion failed: %w", err)
	}
	output.FileID = fileID
	output.IsDuplicate = isDuplicate
	output.ProcessingTime = time.Since(startTime).String()
	return output, nil
}
// validateFilePath validates the file exists, is a regular file, is a WAV file, and is not empty
func validateFilePath(filePath string) (os.FileInfo, error) {
	// Check file exists
	info, err := os.Stat(filePath)
	if err != nil {
		if os.IsNotExist(err) {
			return nil, fmt.Errorf("file does not exist: %s", filePath)
		}
		return nil, fmt.Errorf("cannot access file: %w", err)
	}
	// Check it's a regular file
	if !info.Mode().IsRegular() {
		return nil, fmt.Errorf("path is not a regular file: %s", filePath)
	}
	// Check extension is .wav (case-insensitive)
	ext := strings.ToLower(filepath.Ext(filePath))
	if ext != ".wav" {
		return nil, fmt.Errorf("file must be a WAV file (got extension: %s)", ext)
	}
	// Check file is not empty
	if info.Size() == 0 {
		return nil, fmt.Errorf("file is empty: %s", filePath)
	}
	return info, nil
}
// insertFileIntoDB inserts a single file into the database
// Returns (fileID, isDuplicate, error)
func insertFileIntoDB(
	ctx context.Context,
	database *sql.DB,
	result *utils.FileProcessingResult,
	datasetID, clusterID, locationID string,
) (string, bool, error) {
	// Begin logged transaction
	tx, err := db.BeginLoggedTx(ctx, database, "import_audio_file")
	if err != nil {
		return "", false, fmt.Errorf("failed to begin transaction: %w", err)
	}
	defer tx.Rollback() // Rollback if not committed
	// Check for duplicate hash
	existingID, isDup, err := utils.CheckDuplicateHash(tx, result.Hash)
	if err != nil {
		return "", false, err
	}
	if isDup {
		return existingID, true, nil
	}
	// Generate file ID
	fileID, err := utils.GenerateLongID()
	if err != nil {
		return "", false, fmt.Errorf("ID generation failed: %w", err)
	}
	// Insert file record
	_, err = tx.ExecContext(ctx, `
		INSERT INTO file (
			id, file_name, xxh64_hash, location_id, timestamp_local,
			cluster_id, duration, sample_rate, maybe_solar_night, maybe_civil_night,
			moon_phase, created_at, last_modified, active
		) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, now(), now(), true)
	`,
		fileID, result.FileName, result.Hash, locationID,
		result.TimestampLocal, clusterID, result.Duration, result.SampleRate,
		result.AstroData.SolarNight, result.AstroData.CivilNight, result.AstroData.MoonPhase,
	)
	if err != nil {
		return "", false, fmt.Errorf("file insert failed: %w", err)
	}
	// Insert file_dataset junction
	_, err = tx.ExecContext(ctx, `
		INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified)
		VALUES (?, ?, now(), now())
	`, fileID, datasetID)
	if err != nil {
		return "", false, fmt.Errorf("file_dataset insert failed: %w", err)
	}
	// If AudioMoth, insert moth_metadata
	if result.IsAudioMoth && result.MothData != nil {
		_, err = tx.ExecContext(ctx, `
			INSERT INTO moth_metadata (
				file_id, timestamp, recorder_id, gain, battery_v, temp_c,
				created_at, last_modified, active
			) VALUES (?, ?, ?, ?, ?, ?, now(), now(), true)
		`,
			fileID,
			result.MothData.Timestamp,
			&result.MothData.RecorderID,
			&result.MothData.Gain,
			&result.MothData.BatteryV,
			&result.MothData.TempC,
		)
		if err != nil {
			return "", false, fmt.Errorf("moth_metadata insert failed: %w", err)
		}
	}
	// Commit transaction
	if err = tx.Commit(); err != nil {
		return "", false, fmt.Errorf("transaction commit failed: %w", err)
	}
	return fileID, false, nil
}

File addition: bulk_file_import.go (----------)

[0.1]

package imp
import (
	"context"
	"database/sql"
	"encoding/csv"
	"fmt"
	"os"
	"path/filepath"
	"strconv"
	"strings"
	"time"
	"skraak/db"
	"skraak/utils"
)
// BulkFileImportInput defines the input parameters for the bulk_file_import tool
type BulkFileImportInput struct {
	DBPath      string `json:"db_path"`
	DatasetID   string `json:"dataset_id"`
	CSVPath     string `json:"csv_path"`
	LogFilePath string `json:"log_file_path"`
}
// BulkFileImportOutput defines the output structure for the bulk_file_import tool
type BulkFileImportOutput struct {
	TotalLocations    int      `json:"total_locations"`
	ClustersCreated   int      `json:"clusters_created"`
	ClustersExisting  int      `json:"clusters_existing"`
	TotalFilesScanned int      `json:"total_files_scanned"`
	FilesImported     int      `json:"files_imported"`
	FilesDuplicate    int      `json:"files_duplicate"`
	FilesError        int      `json:"files_error"`
	ProcessingTime    string   `json:"processing_time"`
	Errors            []string `json:"errors,omitempty"`
}
// bulkLocationData holds CSV row data for a location
type bulkLocationData struct {
	LocationName  string
	LocationID    string
	DirectoryPath string
	DateRange     string
	SampleRate    int
	FileCount     int
}
// bulkImportStats tracks import statistics for a single cluster
type bulkImportStats struct {
	TotalFiles     int
	ImportedFiles  int
	DuplicateFiles int
	ErrorFiles     int
}
// progressLogger handles writing to both log file and internal buffer
type progressLogger struct {
	file   *os.File
	buffer *strings.Builder
}
// Log writes a formatted message with timestamp to both log file and buffer
func (l *progressLogger) Log(format string, args ...any) {
	timestamp := time.Now().Format("2006-01-02 15:04:05")
	message := fmt.Sprintf(format, args...)
	line := fmt.Sprintf("[%s] %s\n", timestamp, message)
	// Write to file; log write failures are non-fatal for import progress
	if _, err := l.file.WriteString(line); err != nil {
		fmt.Fprintf(os.Stderr, "Warning: log write failed: %v\n", err)
	}
	if err := l.file.Sync(); err != nil {
		fmt.Fprintf(os.Stderr, "Warning: log sync failed: %v\n", err)
	}
	// Also keep in memory for potential error reporting
	l.buffer.WriteString(line)
}
// BulkFileImport imports WAV files across multiple locations using CSV specification
// failOutput sets error details and processing time on the output before returning.
func (o *BulkFileImportOutput) failOutput(errs []string, startTime time.Time) {
	o.Errors = errs
	o.ProcessingTime = time.Since(startTime).String()
}
// BulkFileImport imports WAV files across multiple locations using CSV specification
func BulkFileImport(
	ctx context.Context,
	input BulkFileImportInput,
) (BulkFileImportOutput, error) {
	startTime := time.Now()
	var output BulkFileImportOutput
	// Open log file
	logFile, err := os.OpenFile(input.LogFilePath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
	if err != nil {
		return output, fmt.Errorf("failed to open log file: %w", err)
	}
	defer func() { _ = logFile.Close() }()
	logger := &progressLogger{
		file:   logFile,
		buffer: &strings.Builder{},
	}
	logger.Log("Starting bulk file import for dataset %s", input.DatasetID)
	// Phase 0: Validate input
	logger.Log("Validating input parameters...")
	if err := bulkValidateInput(input); err != nil {
		logger.Log("ERROR: Validation failed: %v", err)
		output.failOutput([]string{fmt.Sprintf("validation failed: %v", err)}, startTime)
		return output, fmt.Errorf("validation failed: %w", err)
	}
	logger.Log("Validation complete")
	// Phase 1: Read CSV
	logger.Log("Reading CSV file: %s", input.CSVPath)
	locations, err := bulkReadCSV(input.CSVPath)
	if err != nil {
		logger.Log("ERROR: Failed to read CSV: %v", err)
		output.failOutput([]string{fmt.Sprintf("failed to read CSV: %v", err)}, startTime)
		return output, fmt.Errorf("failed to read CSV: %w", err)
	}
	logger.Log("Loaded %d locations from CSV", len(locations))
	output.TotalLocations = len(locations)
	// Phase 1.5: Validate all location_ids belong to the dataset
	logger.Log("Validating location_ids belong to dataset...")
	if err := bulkValidateLocations(logger, locations, input.DatasetID, db.ResolveDBPath(input.DBPath, "")); err != nil {
		output.failOutput([]string{err.Error()}, startTime)
		return output, err
	}
	logger.Log("Location validation complete")
	// Phase 2: Create/Validate Clusters
	logger.Log("=== Phase 1: Creating/Validating Clusters ===")
	database, err := db.OpenWriteableDB(db.ResolveDBPath(input.DBPath, ""))
	if err != nil {
		logger.Log("ERROR: Failed to open database: %v", err)
		output.failOutput([]string{fmt.Sprintf("failed to open database: %v", err)}, startTime)
		return output, fmt.Errorf("failed to open database: %w", err)
	}
	defer database.Close()
	clusterIDMap, created, existing, err := bulkCreateClusters(ctx, database, logger, locations, input.DatasetID)
	if err != nil {
		output.failOutput(output.Errors, startTime)
		return output, err
	}
	output.ClustersCreated = created
	output.ClustersExisting = existing
	// Phase 3: Import files
	logger.Log("=== Phase 2: Importing Files ===")
	fileStats, errs := bulkImportAllFiles(database, logger, locations, clusterIDMap, input.DatasetID)
	output.TotalFilesScanned = fileStats.TotalFiles
	output.FilesImported = fileStats.ImportedFiles
	output.FilesDuplicate = fileStats.DuplicateFiles
	output.FilesError = fileStats.ErrorFiles
	output.Errors = append(output.Errors, errs...)
	if len(errs) > 0 {
		output.ProcessingTime = time.Since(startTime).String()
		return output, fmt.Errorf("failed to import files: %s", errs[0])
	}
	logger.Log("=== Import Complete ===")
	logger.Log("Total files scanned: %d", fileStats.TotalFiles)
	logger.Log("Files imported: %d", fileStats.ImportedFiles)
	logger.Log("Duplicates skipped: %d", fileStats.DuplicateFiles)
	logger.Log("Errors: %d", fileStats.ErrorFiles)
	logger.Log("Processing time: %s", time.Since(startTime).Round(time.Second))
	output.ProcessingTime = time.Since(startTime).String()
	return output, nil
}
// bulkValidateInput validates input parameters
func bulkValidateInput(input BulkFileImportInput) error {
	// Validate ID format first (fast fail before DB queries)
	if err := utils.ValidateShortID(input.DatasetID, "dataset_id"); err != nil {
		return err
	}
	// Verify CSV file exists
	if _, err := os.Stat(input.CSVPath); err != nil {
		return fmt.Errorf("CSV file not accessible: %w", err)
	}
	// Verify log file path is writable
	logDir := filepath.Dir(input.LogFilePath)
	if _, err := os.Stat(logDir); err != nil {
		return fmt.Errorf("log file directory not accessible: %w", err)
	}
	// Open database for validation queries
	database, err := db.OpenReadOnlyDB(db.ResolveDBPath(input.DBPath, ""))
	if err != nil {
		return fmt.Errorf("failed to open database: %w", err)
	}
	defer database.Close()
	// Verify dataset exists and is structured
	if err := db.ValidateDatasetTypeForImport(database, input.DatasetID); err != nil {
		return err
	}
	return nil
}
// bulkValidateLocationsBelongToDataset validates that all unique location_ids in the CSV belong to the dataset
func bulkValidateLocationsBelongToDataset(dbConn *sql.DB, locations []bulkLocationData, datasetID string) []string {
	var errors []string
	// Collect unique location_ids
	uniqueLocations := make(map[string]bool)
	for _, loc := range locations {
		uniqueLocations[loc.LocationID] = true
	}
	// Validate each unique location_id
	for locationID := range uniqueLocations {
		if err := db.ValidateLocationBelongsToDataset(dbConn, locationID, datasetID); err != nil {
			errors = append(errors, err.Error())
		}
	}
	return errors
}
// bulkValidateLocations validates that all location_ids in the CSV belong to the dataset.
// Returns an error if validation fails.
func bulkValidateLocations(logger *progressLogger, locations []bulkLocationData, datasetID string, dbPath string) error {
	readDB, err := db.OpenReadOnlyDB(dbPath)
	if err != nil {
		logger.Log("ERROR: Failed to open database: %v", err)
		return fmt.Errorf("failed to open database: %w", err)
	}
	locationErrors := bulkValidateLocationsBelongToDataset(readDB, locations, datasetID)
	readDB.Close()
	if len(locationErrors) > 0 {
		for _, locErr := range locationErrors {
			logger.Log("ERROR: %s", locErr)
		}
		return fmt.Errorf("location validation failed: %d location(s) do not belong to dataset %s", len(locationErrors), datasetID)
	}
	return nil
}
// bulkCreateClusters creates or validates clusters for all locations.
// Returns the cluster ID map, counts of created/existing clusters, and any error.
func bulkCreateClusters(ctx context.Context, database *sql.DB, logger *progressLogger, locations []bulkLocationData, datasetID string) (map[string]string, int, int, error) {
	clusterIDMap := make(map[string]string)
	created := 0
	existing := 0
	for i, loc := range locations {
		logger.Log("[%d/%d] Processing location: %s", i+1, len(locations), loc.LocationName)
		var existingClusterID string
		err := database.QueryRow(`
			SELECT id FROM cluster
			WHERE location_id = ? AND name = ? AND active = true
		`, loc.LocationID, loc.DateRange).Scan(&existingClusterID)
		var clusterID string
		if err == sql.ErrNoRows {
			clusterID, err = bulkCreateCluster(ctx, database, datasetID, loc.LocationID, loc.DateRange, loc.SampleRate)
			if err != nil {
				logger.Log("ERROR: Failed to create cluster for location %s: %v", loc.LocationName, err)
				return nil, 0, 0, fmt.Errorf("failed to create cluster: %w", err)
			}
			logger.Log("  Created cluster: %s", clusterID)
			created++
		} else if err != nil {
			logger.Log("ERROR: Failed to check cluster for location %s: %v", loc.LocationName, err)
			return nil, 0, 0, fmt.Errorf("failed to check cluster: %w", err)
		} else {
			clusterID = existingClusterID
			logger.Log("  Using existing cluster: %s", clusterID)
			existing++
		}
		compositeKey := loc.LocationID + "|" + loc.DateRange
		clusterIDMap[compositeKey] = clusterID
	}
	return clusterIDMap, created, existing, nil
}
// bulkImportAllFiles imports files for all locations using the cluster ID map.
// Returns aggregate stats and any error messages.
func bulkImportAllFiles(database *sql.DB, logger *progressLogger, locations []bulkLocationData, clusterIDMap map[string]string, datasetID string) (bulkImportStats, []string) {
	var total bulkImportStats
	var errs []string
	for i, loc := range locations {
		compositeKey := loc.LocationID + "|" + loc.DateRange
		clusterID, ok := clusterIDMap[compositeKey]
		if !ok {
			continue
		}
		logger.Log("[%d/%d] Importing files for: %s", i+1, len(locations), loc.LocationName)
		logger.Log("  Directory: %s", loc.DirectoryPath)
		if _, err := os.Stat(loc.DirectoryPath); os.IsNotExist(err) {
			logger.Log("  WARNING: Directory not found, skipping")
			continue
		}
		stats, err := bulkImportFilesForCluster(database, logger, loc.DirectoryPath, datasetID, loc.LocationID, clusterID)
		if err != nil {
			errMsg := fmt.Sprintf("Failed to import files for location %s: %v", loc.LocationName, err)
			logger.Log("ERROR: %s", errMsg)
			return total, []string{errMsg}
		}
		logger.Log("  Scanned: %d files", stats.TotalFiles)
		logger.Log("  Imported: %d, Duplicates: %d", stats.ImportedFiles, stats.DuplicateFiles)
		if stats.ErrorFiles > 0 {
			logger.Log("  Errors: %d files", stats.ErrorFiles)
		}
		total.TotalFiles += stats.TotalFiles
		total.ImportedFiles += stats.ImportedFiles
		total.DuplicateFiles += stats.DuplicateFiles
		total.ErrorFiles += stats.ErrorFiles
	}
	return total, errs
}
func bulkReadCSV(path string) ([]bulkLocationData, error) {
	file, err := os.Open(path)
	if err != nil {
		return nil, err
	}
	defer func() { _ = file.Close() }()
	reader := csv.NewReader(file)
	records, err := reader.ReadAll()
	if err != nil {
		return nil, err
	}
	if len(records) == 0 {
		return nil, fmt.Errorf("CSV file is empty")
	}
	var locations []bulkLocationData
	for i, record := range records {
		if i == 0 {
			continue // Skip header
		}
		if len(record) < 6 {
			return nil, fmt.Errorf("CSV row %d has insufficient columns (expected 6, got %d)", i+1, len(record))
		}
		// Validate required string fields are non-empty
		locationName := strings.TrimSpace(record[0])
		if locationName == "" {
			return nil, fmt.Errorf("empty location_name in row %d", i+1)
		}
		directoryPath := strings.TrimSpace(record[2])
		if directoryPath == "" {
			return nil, fmt.Errorf("empty directory_path in row %d", i+1)
		}
		dateRange := strings.TrimSpace(record[3])
		if dateRange == "" {
			return nil, fmt.Errorf("empty date_range in row %d", i+1)
		}
		// Validate location_id format
		locationID := record[1]
		if err := utils.ValidateShortID(locationID, "location_id"); err != nil {
			return nil, fmt.Errorf("invalid location_id in row %d: %v", i+1, err)
		}
		sampleRate, err := strconv.Atoi(record[4])
		if err != nil {
			return nil, fmt.Errorf("invalid sample_rate in row %d: %v", i+1, err)
		}
		// Validate sample rate is in reasonable range
		if err := utils.ValidateSampleRate(sampleRate); err != nil {
			return nil, fmt.Errorf("invalid sample_rate in row %d: %v", i+1, err)
		}
		fileCount, err := strconv.Atoi(record[5])
		if err != nil {
			return nil, fmt.Errorf("invalid file_count in row %d: %v", i+1, err)
		}
		locations = append(locations, bulkLocationData{
			LocationName:  locationName,
			LocationID:    locationID,
			DirectoryPath: directoryPath,
			DateRange:     dateRange,
			SampleRate:    sampleRate,
			FileCount:     fileCount,
		})
	}
	return locations, nil
}
// bulkCreateCluster creates a new cluster in the database
func bulkCreateCluster(ctx context.Context, database *sql.DB, datasetID, locationID, name string, sampleRate int) (string, error) {
	// Generate a 12-character nanoid
	clusterID, err := utils.GenerateShortID()
	if err != nil {
		return "", fmt.Errorf("failed to generate cluster ID: %v", err)
	}
	now := time.Now().UTC()
	// Get location name for the path
	var locationName string
	err = database.QueryRow("SELECT name FROM location WHERE id = ?", locationID).Scan(&locationName)
	if err != nil {
		return "", fmt.Errorf("failed to get location name: %v", err)
	}
	// Normalize path: replace spaces and special characters
	path := strings.ReplaceAll(locationName, " ", "_")
	path = strings.ReplaceAll(path, "/", "_")
	tx, err := db.BeginLoggedTx(ctx, database, "bulk_file_import")
	if err != nil {
		return "", fmt.Errorf("failed to begin transaction: %w", err)
	}
	defer tx.Rollback()
	_, err = tx.ExecContext(ctx, `
		INSERT INTO cluster (id, dataset_id, location_id, name, path, sample_rate, active, created_at, last_modified)
		VALUES (?, ?, ?, ?, ?, ?, true, ?, ?)
	`, clusterID, datasetID, locationID, name, path, sampleRate, now, now)
	if err != nil {
		return "", fmt.Errorf("failed to insert cluster: %w", err)
	}
	if err = tx.Commit(); err != nil {
		return "", fmt.Errorf("failed to commit cluster creation: %w", err)
	}
	return clusterID, nil
}
// bulkImportFilesForCluster imports all WAV files for a single cluster
func bulkImportFilesForCluster(database *sql.DB, logger *progressLogger, folderPath, datasetID, locationID, clusterID string) (*bulkImportStats, error) {
	stats := &bulkImportStats{}
	// Check if directory exists
	if _, err := os.Stat(folderPath); os.IsNotExist(err) {
		logger.Log("  WARNING: Directory not found, skipping")
		return stats, nil
	}
	// Import the cluster (SAME LOGIC AS import_files.go)
	logger.Log("  Importing cluster %s", clusterID)
	ctx := context.Background()
	tx, err := db.BeginLoggedTx(ctx, database, "import_audio_files")
	if err != nil {
		return nil, fmt.Errorf("failed to begin transaction: %w", err)
	}
	clusterOutput, err := utils.ImportCluster(database, tx.UnderlyingTx(), utils.ClusterImportInput{
		FolderPath: folderPath,
		DatasetID:  datasetID,
		LocationID: locationID,
		ClusterID:  clusterID,
		Recursive:  true,
	})
	if err != nil {
		tx.Rollback()
		return nil, err
	}
	if err := tx.Commit(); err != nil {
		return nil, fmt.Errorf("transaction commit failed: %w", err)
	}
	// Map to bulk import stats
	stats.TotalFiles = clusterOutput.TotalFiles
	stats.ImportedFiles = clusterOutput.ImportedFiles
	stats.DuplicateFiles = clusterOutput.SkippedFiles
	stats.ErrorFiles = clusterOutput.FailedFiles
	// Log errors
	for i, fileErr := range clusterOutput.Errors {
		if i < 5 { // Log first 5
			logger.Log("    ERROR: %s: %s", fileErr.FileName, fileErr.Error)
		}
	}
	logger.Log("  Complete: %d imported, %d duplicates, %d errors", stats.ImportedFiles, stats.DuplicateFiles, stats.ErrorFiles)
	return stats, nil
}

File addition: calls (d--x------)
[6.248737]

File addition: parallel_aggregate.go (----------)

[0.67281]

package calls
import (
	"fmt"
	"os"
	"path/filepath"
	"sort"
	"sync/atomic"
)
// parallelResult is the common interface for birda/raven worker results.
type parallelResult interface {
	filePath() string
	getCalls() []ClusteredCall
	wasWritten() bool
	wasSkipped() bool
	getError() error
}
// aggregateStats holds the collected results from a parallel fan-out/fan-in.
type aggregateStats struct {
	calls            []ClusteredCall
	speciesCount     map[string]int
	dataFilesWritten int
	dataFilesSkipped int
	filesProcessed   int
	filesDeleted     int
	firstErr         error
}
// aggregateResults collects results from a channel of parallelResult values,
// handling error tracking, species counting, optional file deletion, and
// progress reporting. Returns the aggregated stats.
func aggregateResults(
	results <-chan parallelResult,
	total int,
	processed *atomic.Int32,
	deleteFiles bool,
	progressHandler func(int, int, string),
) aggregateStats {
	var stats aggregateStats
	stats.speciesCount = make(map[string]int)
	for result := range results {
		if err := result.getError(); err != nil && stats.firstErr == nil {
			stats.firstErr = err
		}
		if result.wasWritten() {
			stats.dataFilesWritten++
		}
		if result.wasSkipped() {
			stats.dataFilesSkipped++
		}
		for _, call := range result.getCalls() {
			stats.calls = append(stats.calls, call)
			stats.speciesCount[call.EbirdCode]++
		}
		stats.filesProcessed++
		stats.maybeDeleteFile(deleteFiles, result)
		if progressHandler != nil {
			current := int(processed.Add(1))
			progressHandler(current, total, filepath.Base(result.filePath()))
		}
	}
	return stats
}
// maybeDeleteFile deletes the source file if requested and it was successfully processed.
func (s *aggregateStats) maybeDeleteFile(deleteFiles bool, result parallelResult) {
	if !deleteFiles || !result.wasWritten() {
		return
	}
	if err := os.Remove(result.filePath()); err != nil {
		if s.firstErr == nil {
			s.firstErr = fmt.Errorf("failed to delete %s: %w", result.filePath(), err)
		}
	} else {
		s.filesDeleted++
	}
}
// sortCallsByFileAndTime sorts calls by filename, then start time.
func sortCallsByFileAndTime(calls []ClusteredCall) {
	sort.Slice(calls, func(i, j int) bool {
		if calls[i].File != calls[j].File {
			return calls[i].File < calls[j].File
		}
		return calls[i].StartTime < calls[j].StartTime
	})
}

File addition: isnight.go (----------)

[0.67281]

package calls
import (
	"fmt"
	"strings"
	"time"
	"github.com/sixdouglas/suncalc"
	"skraak/utils"
)
// IsNightInput defines the input parameters for the isnight tool
type IsNightInput struct {
	FilePath string  `json:"file_path"`
	Lat      float64 `json:"lat"`
	Lng      float64 `json:"lng"`
	Timezone string  `json:"timezone,omitempty"`
}
// IsNightOutput defines the output structure for the isnight tool
type IsNightOutput struct {
	FilePath      string  `json:"file_path"`
	TimestampUTC  string  `json:"timestamp_utc"`
	SolarNight    bool    `json:"solar_night"`
	CivilNight    bool    `json:"civil_night"`
	DiurnalActive bool    `json:"diurnal_active"`
	MoonPhase     float64 `json:"moon_phase"`
	DurationSec   float64 `json:"duration_seconds"`
	TimestampSrc  string  `json:"timestamp_source"`
	MidpointUTC   string  `json:"midpoint_utc"`
	SunriseUTC    string  `json:"sunrise_utc,omitempty"`
	SunsetUTC     string  `json:"sunset_utc,omitempty"`
	DawnUTC       string  `json:"dawn_utc,omitempty"`
	DuskUTC       string  `json:"dusk_utc,omitempty"`
}
// IsNight determines if a WAV file was recorded at night based on its
// metadata timestamp and the given GPS coordinates.
//
// Timestamp resolution order:
//  1. AudioMoth comment (timezone embedded)
//  2. Filename timestamp + timezone offset (requires --timezone)
//  3. File modification time (system local time)
func IsNight(input IsNightInput) (IsNightOutput, error) {
	var output IsNightOutput
	// Step 1: Parse WAV header
	metadata, err := utils.ParseWAVHeader(input.FilePath)
	if err != nil {
		return output, fmt.Errorf("WAV header parsing failed: %w", err)
	}
	output.DurationSec = metadata.Duration
	// Step 2: Resolve timestamp (use file mod time as fallback)
	tsResult, err := utils.ResolveTimestamp(metadata, input.FilePath, input.Timezone, true, nil)
	if err != nil {
		return output, fmt.Errorf("cannot determine recording timestamp: %w", err)
	}
	// Determine timestamp source label
	tsSource := "file_mod_time"
	if tsResult.IsAudioMoth {
		tsSource = "audiomoth_comment"
	} else if utils.HasTimestampFilename(input.FilePath) {
		tsSource = "filename"
	}
	// Step 3: Calculate astronomical data using recording midpoint
	astroData := utils.CalculateAstronomicalData(
		tsResult.Timestamp.UTC(),
		metadata.Duration,
		input.Lat,
		input.Lng,
	)
	// Step 4: Get sun event times for informational output
	midpoint := utils.CalculateMidpointTime(tsResult.Timestamp.UTC(), metadata.Duration)
	sunTimes := suncalc.GetTimes(midpoint, input.Lat, input.Lng)
	output.FilePath = input.FilePath
	output.TimestampUTC = tsResult.Timestamp.UTC().Format(time.RFC3339)
	output.SolarNight = astroData.SolarNight
	output.CivilNight = astroData.CivilNight
	output.MoonPhase = astroData.MoonPhase
	output.TimestampSrc = tsSource
	output.MidpointUTC = midpoint.Format(time.RFC3339)
	populateSunTimes(&output, sunTimes, midpoint)
	return output, nil
}
// sunTimeUTC returns the UTC RFC3339 string for a suncalc event, or "" if absent/zero.
func sunTimeUTC(sunTimes map[suncalc.DayTimeName]suncalc.DayTime, name suncalc.DayTimeName) string {
	if entry, ok := sunTimes[name]; ok && !entry.Value.IsZero() {
		return entry.Value.UTC().Format(time.RFC3339)
	}
	return ""
}
// populateSunTimes fills in sun event times and diurnal status from suncalc results.
func populateSunTimes(output *IsNightOutput, sunTimes map[suncalc.DayTimeName]suncalc.DayTime, midpoint time.Time) {
	// Diurnal: midpoint is between dawn and sunset
	if dawn, ok := sunTimes[suncalc.Dawn]; ok && !dawn.Value.IsZero() {
		if sunset, ok := sunTimes[suncalc.Sunset]; ok && !sunset.Value.IsZero() {
			output.DiurnalActive = !midpoint.Before(dawn.Value) && !midpoint.After(sunset.Value)
		}
	}
	output.SunriseUTC = sunTimeUTC(sunTimes, suncalc.Sunrise)
	output.SunsetUTC = sunTimeUTC(sunTimes, suncalc.Sunset)
	output.DawnUTC = sunTimeUTC(sunTimes, suncalc.Dawn)
	output.DuskUTC = sunTimeUTC(sunTimes, suncalc.Dusk)
}
// String returns a human-readable summary of the isnight result
func (o IsNightOutput) String() string {
	var sb strings.Builder
	fmt.Fprintf(&sb, "File: %s\n", o.FilePath)
	fmt.Fprintf(&sb, "Timestamp (UTC): %s\n", o.TimestampUTC)
	fmt.Fprintf(&sb, "Midpoint (UTC):  %s\n", o.MidpointUTC)
	fmt.Fprintf(&sb, "Duration: %.1f seconds\n", o.DurationSec)
	fmt.Fprintf(&sb, "Source: %s\n", o.TimestampSrc)
	fmt.Fprintf(&sb, "Solar night: %v\n", o.SolarNight)
	fmt.Fprintf(&sb, "Civil night: %v\n", o.CivilNight)
	fmt.Fprintf(&sb, "Moon phase: %.2f\n", o.MoonPhase)
	if o.SunriseUTC != "" {
		fmt.Fprintf(&sb, "Sunrise (UTC): %s\n", o.SunriseUTC)
	}
	if o.SunsetUTC != "" {
		fmt.Fprintf(&sb, "Sunset (UTC):  %s\n", o.SunsetUTC)
	}
	if o.DawnUTC != "" {
		fmt.Fprintf(&sb, "Dawn (UTC):    %s\n", o.DawnUTC)
	}
	if o.DuskUTC != "" {
		fmt.Fprintf(&sb, "Dusk (UTC):    %s\n", o.DuskUTC)
	}
	return sb.String()
}

File addition: calls_summarise.go (----------)

[0.67281]

package calls
import (
	"sort"
	"strings"
	"skraak/utils"
)
// CallsSummariseInput defines the input for the calls-summarise tool
type CallsSummariseInput struct {
	Folder string `json:"folder"`
	Brief  bool   `json:"brief"`
	Filter string `json:"filter,omitempty"`
}
// CallsSummariseOutput defines the output for the calls-summarise tool
type CallsSummariseOutput struct {
	Segments         []SegmentSummary       `json:"segments"`
	Folder           string                 `json:"folder"`
	DataFilesRead    int                    `json:"data_files_read"`
	DataFilesSkipped []string               `json:"data_files_skipped"`
	TotalSegments    int                    `json:"total_segments"`
	Filters          map[string]FilterStats `json:"filters"`
	ReviewStatus     ReviewStatus           `json:"review_status"`
	Operators        []string               `json:"operators"`
	Reviewers        []string               `json:"reviewers"`
	Error            *string                `json:"error,omitempty"`
}
// SegmentSummary represents a single segment in the output
type SegmentSummary struct {
	File      string         `json:"file"`
	StartTime float64        `json:"start_time"`
	EndTime   float64        `json:"end_time"`
	Labels    []LabelSummary `json:"labels"`
}
// LabelSummary represents a label in the output (omits empty fields)
type LabelSummary struct {
	Filter    string `json:"filter"`
	Certainty int    `json:"certainty"`
	Species   string `json:"species"`
	CallType  string `json:"calltype,omitempty"`
	Comment   string `json:"comment,omitempty"`
	Bookmark  bool   `json:"bookmark,omitempty"`
}
// FilterStats contains per-filter statistics
type FilterStats struct {
	Segments  int                       `json:"segments"`
	Species   map[string]int            `json:"species"`
	Calltypes map[string]map[string]int `json:"calltypes,omitempty"` // species -> calltype -> count
}
// ReviewStatus contains review progress statistics
type ReviewStatus struct {
	Unreviewed   int `json:"unreviewed"` // certainty < 100
	Confirmed    int `json:"confirmed"`  // certainty = 100
	DontKnow     int `json:"dont_know"`  // certainty = 0
	WithCallType int `json:"with_calltype"`
	WithComments int `json:"with_comments"`
	Bookmarked   int `json:"bookmarked"`
}
// CallsSummarise reads all .data files in a folder and produces a summary
func CallsSummarise(input CallsSummariseInput) (CallsSummariseOutput, error) {
	var output CallsSummariseOutput
	// Find all .data files
	filePaths, err := utils.FindDataFiles(input.Folder)
	if err != nil {
		errMsg := err.Error()
		output.Error = &errMsg
		return output, err
	}
	// Initialize empty slices/maps (avoid null in JSON)
	output.Segments = make([]SegmentSummary, 0)
	output.Folder = input.Folder
	output.Filters = make(map[string]FilterStats)
	output.Operators = make([]string, 0)
	output.Reviewers = make([]string, 0)
	output.DataFilesSkipped = make([]string, 0)
	if len(filePaths) == 0 {
		return output, nil
	}
	// Track unique operators and reviewers
	operatorSet := make(map[string]bool)
	reviewerSet := make(map[string]bool)
	summariseFiles(filePaths, input, &output, operatorSet, reviewerSet)
	// Count segments for total
	if input.Brief {
		for _, fs := range output.Filters {
			output.TotalSegments += fs.Segments
		}
	} else {
		output.TotalSegments = len(output.Segments)
	}
	finaliseSummary(&output, operatorSet, reviewerSet, input.Brief)
	return output, nil
}
// summariseFiles processes all data files, populating output stats
func summariseFiles(filePaths []string, input CallsSummariseInput, output *CallsSummariseOutput, operatorSet, reviewerSet map[string]bool) {
	for _, path := range filePaths {
		df, err := utils.ParseDataFile(path)
		if err != nil {
			output.DataFilesSkipped = append(output.DataFilesSkipped, path)
			continue
		}
		output.DataFilesRead++
		trackMeta(df.Meta, operatorSet, reviewerSet)
		var relPath string
		if !input.Brief {
			relPath = extractRelativePath(input.Folder, path)
		}
		for _, seg := range df.Segments {
			filteredLabels := filterLabels(seg.Labels, input.Filter)
			if input.Filter != "" && len(filteredLabels) == 0 {
				continue
			}
			updateStatsFromLabels(filteredLabels, output)
			if !input.Brief {
				output.Segments = append(output.Segments, SegmentSummary{
					File:      relPath,
					StartTime: seg.StartTime,
					EndTime:   seg.EndTime,
					Labels:    buildLabelSummaries(filteredLabels),
				})
			}
		}
	}
}
// trackMeta records operator and reviewer from file metadata
func trackMeta(meta *utils.DataMeta, operatorSet, reviewerSet map[string]bool) {
	if meta == nil {
		return
	}
	if meta.Operator != "" {
		operatorSet[meta.Operator] = true
	}
	if meta.Reviewer != "" {
		reviewerSet[meta.Reviewer] = true
	}
}
// filterLabels returns labels matching the filter, or all labels if filter is empty
func filterLabels(labels []*utils.Label, filter string) []*utils.Label {
	if filter == "" {
		return labels
	}
	var filtered []*utils.Label
	for _, l := range labels {
		if l.Filter == filter {
			filtered = append(filtered, l)
		}
	}
	return filtered
}
// buildLabelSummaries converts labels to label summaries
func buildLabelSummaries(labels []*utils.Label) []LabelSummary {
	var summaries []LabelSummary
	for _, l := range labels {
		ls := LabelSummary{
			Filter:    l.Filter,
			Certainty: l.Certainty,
			Species:   l.Species,
		}
		if l.CallType != "" {
			ls.CallType = l.CallType
		}
		if l.Comment != "" {
			ls.Comment = l.Comment
		}
		if l.Bookmark {
			ls.Bookmark = true
		}
		summaries = append(summaries, ls)
	}
	return summaries
}
// updateStatsFromLabels updates filter stats and review status from a set of labels
func updateStatsFromLabels(labels []*utils.Label, output *CallsSummariseOutput) {
	for _, l := range labels {
		updateFilterStats(l, output)
		updateReviewStatus(l, output)
	}
}
// updateFilterStats increments filter-level statistics for a single label
func updateFilterStats(l *utils.Label, output *CallsSummariseOutput) {
	fs, exists := output.Filters[l.Filter]
	if !exists {
		fs = FilterStats{
			Segments:  0,
			Species:   make(map[string]int),
			Calltypes: make(map[string]map[string]int),
		}
	}
	fs.Segments++
	fs.Species[l.Species]++
	if l.CallType != "" {
		if fs.Calltypes[l.Species] == nil {
			fs.Calltypes[l.Species] = make(map[string]int)
		}
		fs.Calltypes[l.Species][l.CallType]++
	}
	output.Filters[l.Filter] = fs
}
// updateReviewStatus increments review status counters for a single label
func updateReviewStatus(l *utils.Label, output *CallsSummariseOutput) {
	switch l.Certainty {
	case 100:
		output.ReviewStatus.Confirmed++
	case 0:
		output.ReviewStatus.DontKnow++
	default:
		output.ReviewStatus.Unreviewed++
	}
	if l.CallType != "" {
		output.ReviewStatus.WithCallType++
	}
	if l.Comment != "" {
		output.ReviewStatus.WithComments++
	}
	if l.Bookmark {
		output.ReviewStatus.Bookmarked++
	}
}
// finaliseSummary sorts output, cleans empty maps, and converts sets to sorted slices
func finaliseSummary(output *CallsSummariseOutput, operatorSet, reviewerSet map[string]bool, brief bool) {
	// Clean up empty calltypes maps
	for filter, fs := range output.Filters {
		if len(fs.Calltypes) == 0 {
			fs.Calltypes = nil
			output.Filters[filter] = fs
		}
	}
	// Convert sets to sorted slices
	for op := range operatorSet {
		output.Operators = append(output.Operators, op)
	}
	for r := range reviewerSet {
		output.Reviewers = append(output.Reviewers, r)
	}
	sort.Strings(output.Operators)
	sort.Strings(output.Reviewers)
	// Sort segments by file, then start time
	if !brief {
		sort.Slice(output.Segments, func(i, j int) bool {
			if output.Segments[i].File != output.Segments[j].File {
				return output.Segments[i].File < output.Segments[j].File
			}
			return output.Segments[i].StartTime < output.Segments[j].StartTime
		})
	}
}
// extractRelativePath extracts the audio filename from a .data file path
// e.g., "/folder/tx51_LISTENING_20260221_203004.WAV.data" -> "tx51_LISTENING_20260221_203004.WAV"
// Preserves the original case of the extension as-is.
func extractRelativePath(folder, dataPath string) string {
	// Get the filename
	filename := dataPath
	if idx := strings.LastIndex(dataPath, "/"); idx >= 0 {
		filename = dataPath[idx+1:]
	}
	// Remove .data extension, preserve everything else
	return strings.TrimSuffix(filename, ".data")
}

File addition: calls_show_images.go (----------)

[0.67281]

package calls
import (
	"fmt"
	"os"
	"strings"
	"skraak/utils"
)
// CallsShowImagesInput defines the input for the show-images tool
type CallsShowImagesInput struct {
	DataFilePath string `json:"data_file_path"`
	Color        bool   `json:"color"`
	ImageSize    int    `json:"image_size"`
	Sixel        bool   `json:"sixel"`
	ITerm        bool   `json:"iterm"`
}
// CallsShowImagesOutput defines the output for the show-images tool
type CallsShowImagesOutput struct {
	SegmentsShown int    `json:"segments_shown"`
	WavFile       string `json:"wav_file"`
	Error         string `json:"error,omitempty"`
}
// CallsShowImages reads a .data file and displays spectrogram images for each segment
func CallsShowImages(input CallsShowImagesInput) (CallsShowImagesOutput, error) {
	var output CallsShowImagesOutput
	// Validate file exists
	if _, err := os.Stat(input.DataFilePath); os.IsNotExist(err) {
		output.Error = fmt.Sprintf("File not found: %s", input.DataFilePath)
		return output, fmt.Errorf("%s", output.Error)
	}
	// Derive WAV file path (strip .data suffix)
	wavPath := strings.TrimSuffix(input.DataFilePath, ".data")
	output.WavFile = wavPath
	// Check WAV file exists
	if _, err := os.Stat(wavPath); os.IsNotExist(err) {
		output.Error = fmt.Sprintf("WAV file not found: %s", wavPath)
		return output, fmt.Errorf("%s", output.Error)
	}
	// Parse .data file (includes labels for future filtering)
	dataFile, err := utils.ParseDataFile(input.DataFilePath)
	if err != nil {
		output.Error = err.Error()
		return output, fmt.Errorf("%s", output.Error)
	}
	if len(dataFile.Segments) == 0 {
		output.Error = "No segments found in .data file"
		return output, fmt.Errorf("%s", output.Error)
	}
	// Resolve image size
	imgSize := input.ImageSize
	if imgSize == 0 {
		imgSize = utils.SpectrogramDisplaySize
	}
	// Select graphics protocol
	protocol := utils.ProtocolKitty
	if input.ITerm {
		protocol = utils.ProtocolITerm
	} else if input.Sixel {
		protocol = utils.ProtocolSixel
	}
	// Generate spectrogram for each segment and output
	for i, seg := range dataFile.Segments {
		// Generate spectrogram image
		img, err := utils.GenerateSegmentSpectrogram(input.DataFilePath, seg.StartTime, seg.EndTime, input.Color, imgSize)
		if err != nil || img == nil {
			continue
		}
		// Print segment info
		labelInfo := formatSegmentLabels(seg.Labels)
		fmt.Fprintf(os.Stderr, "Segment %d: %.1fs - %.1fs (%.1fs)%s\n",
			i+1, seg.StartTime, seg.EndTime, seg.EndTime-seg.StartTime, labelInfo)
		// Write to stdout via terminal graphics protocol
		if err := utils.WriteImage(img, os.Stdout, protocol); err != nil {
			output.Error = fmt.Sprintf("Failed to write image: %v", err)
			return output, fmt.Errorf("%s", output.Error)
		}
		fmt.Println() // Newline after image
	}
	output.SegmentsShown = len(dataFile.Segments)
	return output, nil
}
// formatSegmentLabels formats labels for display in segment info
func formatSegmentLabels(labels []*utils.Label) string {
	if len(labels) == 0 {
		return ""
	}
	var parts []string
	for _, l := range labels {
		part := l.Species
		if l.CallType != "" {
			part += "/" + l.CallType
		}
		if l.Filter != "" {
			part += " [" + l.Filter + "]"
		}
		parts = append(parts, part)
	}
	return "  " + strings.Join(parts, ", ")
}

File addition: calls_push_certainty_test.go (----------)

[0.67281]

package calls
import (
	"encoding/json"
	"os"
	"path/filepath"
	"testing"
	"skraak/utils"
)
func TestPushCertaintyPromotesMatchingLabels(t *testing.T) {
	tempDir := t.TempDir()
	// File with two Kiwi segments: certainty=90 and certainty=70
	file1 := `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]], [10, 20, 100, 1000, [{"species": "Kiwi", "certainty": 70}]]]`
	file1Path := filepath.Join(tempDir, "file1.data")
	if err := os.WriteFile(file1Path, []byte(file1), 0644); err != nil {
		t.Fatal(err)
	}
	// File with one Tomtit at certainty=90 (must not be promoted when species=Kiwi)
	file2 := `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Tomtit", "certainty": 90}]]]`
	file2Path := filepath.Join(tempDir, "file2.data")
	if err := os.WriteFile(file2Path, []byte(file2), 0644); err != nil {
		t.Fatal(err)
	}
	result, err := PushCertainty(PushCertaintyConfig{
		Folder:   tempDir,
		Species:  "Kiwi",
		Reviewer: "TestReviewer",
	})
	if err != nil {
		t.Fatal(err)
	}
	if result.SegmentsUpdated != 1 {
		t.Errorf("expected 1 segment updated, got %d", result.SegmentsUpdated)
	}
	if result.FilesUpdated != 1 {
		t.Errorf("expected 1 file updated, got %d", result.FilesUpdated)
	}
	// Verify file1: certainty=90 Kiwi → 100, certainty=70 Kiwi → unchanged
	df, err := utils.ParseDataFile(file1Path)
	if err != nil {
		t.Fatal(err)
	}
	if df.Segments[0].Labels[0].Certainty != 100 {
		t.Errorf("expected certainty=100, got %d", df.Segments[0].Labels[0].Certainty)
	}
	if df.Segments[1].Labels[0].Certainty != 70 {
		t.Errorf("expected certainty=70 unchanged, got %d", df.Segments[1].Labels[0].Certainty)
	}
	if df.Meta.Reviewer != "TestReviewer" {
		t.Errorf("expected reviewer=TestReviewer, got %q", df.Meta.Reviewer)
	}
	// Verify Tomtit file was not modified
	df2, err := utils.ParseDataFile(file2Path)
	if err != nil {
		t.Fatal(err)
	}
	if df2.Segments[0].Labels[0].Certainty != 90 {
		t.Errorf("Tomtit certainty should be unchanged at 90, got %d", df2.Segments[0].Labels[0].Certainty)
	}
}
func TestPushCertaintyFilterScope(t *testing.T) {
	tempDir := t.TempDir()
	// Segment has two labels from different filters, both Kiwi certainty=90
	data := []any{
		map[string]any{"Operator": "test"},
		[]any{0.0, 10.0, 100.0, 1000.0, []any{
			map[string]any{"species": "Kiwi", "certainty": 90, "filter": "model-a"},
			map[string]any{"species": "Kiwi", "certainty": 90, "filter": "model-b"},
		}},
	}
	raw, _ := json.Marshal(data)
	filePath := filepath.Join(tempDir, "file1.data")
	if err := os.WriteFile(filePath, raw, 0644); err != nil {
		t.Fatal(err)
	}
	// Push only model-a
	result, err := PushCertainty(PushCertaintyConfig{
		Folder:   tempDir,
		Filter:   "model-a",
		Species:  "Kiwi",
		Reviewer: "TestReviewer",
	})
	if err != nil {
		t.Fatal(err)
	}
	if result.SegmentsUpdated != 1 {
		t.Errorf("expected 1 segment updated, got %d", result.SegmentsUpdated)
	}
	// Verify only model-a label was promoted; model-b stays at 90
	df, err := utils.ParseDataFile(filePath)
	if err != nil {
		t.Fatal(err)
	}
	for _, label := range df.Segments[0].Labels {
		if label.Filter == "model-a" && label.Certainty != 100 {
			t.Errorf("model-a label should be 100, got %d", label.Certainty)
		}
		if label.Filter == "model-b" && label.Certainty != 90 {
			t.Errorf("model-b label should be unchanged at 90, got %d", label.Certainty)
		}
	}
}

File addition: calls_push_certainty.go (----------)

[0.67281]

package calls
import (
	"fmt"
	"skraak/utils"
)
// PushCertaintyConfig holds the configuration for push-certainty
type PushCertaintyConfig struct {
	Folder   string
	File     string
	Filter   string
	Species  string
	CallType string
	Night    bool
	Day      bool
	Lat      float64
	Lng      float64
	Timezone string
	Reviewer string
}
// PushCertaintyResult holds the result of push-certainty
type PushCertaintyResult struct {
	SegmentsUpdated   int `json:"segments_updated"`
	FilesUpdated      int `json:"files_updated"`
	TimeFilteredCount int `json:"time_filtered_count"`
}
// PushCertainty promotes all certainty=90 segments matching the filter scope to certainty=100.
// Uses identical filtering logic to LoadDataFiles so the scope matches calls classify exactly.
func PushCertainty(config PushCertaintyConfig) (*PushCertaintyResult, error) {
	state, err := LoadDataFiles(ClassifyConfig{
		Folder:    config.Folder,
		File:      config.File,
		Filter:    config.Filter,
		Species:   config.Species,
		CallType:  config.CallType,
		Certainty: 90,
		Sample:    -1,
		Night:     config.Night,
		Day:       config.Day,
		Lat:       config.Lat,
		Lng:       config.Lng,
		Timezone:  config.Timezone,
	})
	if err != nil {
		return nil, err
	}
	var segsUpdated, filesUpdated int
	for i, df := range state.DataFiles {
		changed := false
		for _, seg := range state.FilteredSegs()[i] {
			for _, label := range seg.Labels {
				if labelMatchesPush(label, config.Filter, config.Species, config.CallType) {
					label.Certainty = 100
					changed = true
					segsUpdated++
				}
			}
		}
		if changed {
			df.Meta.Reviewer = config.Reviewer
			if err := df.Write(df.FilePath); err != nil {
				return nil, fmt.Errorf("write %s: %w", df.FilePath, err)
			}
			filesUpdated++
		}
	}
	return &PushCertaintyResult{
		SegmentsUpdated:   segsUpdated,
		FilesUpdated:      filesUpdated,
		TimeFilteredCount: state.TimeFilteredCount,
	}, nil
}
// labelMatchesPush returns true if the label matches the push scope and has certainty=90.
// Certainty is already guaranteed by LoadDataFiles, but we re-check to target only the
// specific label that matched (a segment may carry labels from multiple filters).
func labelMatchesPush(label *utils.Label, filter, species, callType string) bool {
	if filter != "" && label.Filter != filter {
		return false
	}
	if species != "" && label.Species != species {
		return false
	}
	if callType != "" && label.CallType != callType {
		return false
	}
	return label.Certainty == 90
}

File addition: calls_propagate_test.go (----------)

[0.67281]

package calls
import (
	"path/filepath"
	"testing"
	"skraak/utils"
)
// helpers
func seg(start, end float64, labels ...*utils.Label) *utils.Segment {
	return &utils.Segment{
		StartTime: start,
		EndTime:   end,
		FreqLow:   100,
		FreqHigh:  8000,
		Labels:    labels,
	}
}
func lbl(filter, species, calltype string, certainty int) *utils.Label {
	return &utils.Label{
		Filter:    filter,
		Species:   species,
		CallType:  calltype,
		Certainty: certainty,
	}
}
func writeFile(t *testing.T, segs ...*utils.Segment) string {
	t.Helper()
	dir := t.TempDir()
	path := filepath.Join(dir, "test.data")
	df := &utils.DataFile{
		Meta:     &utils.DataMeta{Operator: "ML", Reviewer: "David", Duration: 3600},
		Segments: segs,
	}
	if err := df.Write(path); err != nil {
		t.Fatalf("write fixture: %v", err)
	}
	return path
}
func readFile(t *testing.T, path string) *utils.DataFile {
	t.Helper()
	df, err := utils.ParseDataFile(path)
	if err != nil {
		t.Fatalf("parse %s: %v", path, err)
	}
	return df
}
// findLabel returns the label with matching filter and time on the parsed file, or nil.
func findLabel(df *utils.DataFile, filter string, start, end float64) *utils.Label {
	for _, s := range df.Segments {
		if s.StartTime != start || s.EndTime != end {
			continue
		}
		for _, l := range s.Labels {
			if l.Filter == filter {
				return l
			}
		}
	}
	return nil
}
const (
	fFrom = "opensoundscape-kiwi-1.2"
	fTo   = "opensoundscape-kiwi-1.5"
)
func TestPropagate_HappyPathSingle(t *testing.T) {
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v (%s)", err, out.Error)
	}
	if out.Propagated != 1 || out.TargetsExamined != 1 || out.SkippedConflict != 0 || out.SkippedNoOverlap != 0 {
		t.Fatalf("counts wrong: %+v", out)
	}
	df := readFile(t, path)
	target := findLabel(df, fTo, 100, 125)
	if target == nil {
		t.Fatal("target label missing")
	}
	if target.Species != "Kiwi" || target.CallType != "Male" || target.Certainty != 90 {
		t.Errorf("target not updated correctly: species=%q calltype=%q cert=%d", target.Species, target.CallType, target.Certainty)
	}
	if df.Meta.Reviewer != "Skraak" {
		t.Errorf("reviewer = %q, want Skraak", df.Meta.Reviewer)
	}
}
func TestPropagate_NoOverlap(t *testing.T) {
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(500, 525, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.Propagated != 0 || out.TargetsExamined != 1 || out.SkippedNoOverlap != 1 {
		t.Fatalf("counts wrong: %+v", out)
	}
	df := readFile(t, path)
	target := findLabel(df, fTo, 500, 525)
	if target.Certainty != 70 {
		t.Errorf("target should not be modified, cert=%d", target.Certainty)
	}
	if df.Meta.Reviewer != "David" {
		t.Errorf("reviewer should stay David (no write), got %q", df.Meta.Reviewer)
	}
}
func TestPropagate_SourceWrongSpecies_Ignored(t *testing.T) {
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Weka", "", 100)),
		seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.Propagated != 0 || out.SkippedNoOverlap != 1 {
		t.Fatalf("counts wrong: %+v", out)
	}
}
func TestPropagate_SourceWrongCertainty_Ignored(t *testing.T) {
	// cert=70 and cert=0 source labels must NOT count as sources.
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 70)),
		seg(200, 225, lbl(fFrom, "Don't Know", "", 0)),
		seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
		seg(200, 225, lbl(fTo, "Kiwi", "Male", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.Propagated != 0 || out.SkippedNoOverlap != 2 {
		t.Fatalf("counts wrong: %+v", out)
	}
}
func TestPropagate_SourceWrongFilter_Ignored(t *testing.T) {
	path := writeFile(t,
		seg(100, 125, lbl("some-other-filter", "Kiwi", "Male", 100)),
		seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if !out.FiltersMissing || out.Propagated != 0 || out.TargetsExamined != 0 {
		t.Fatalf("expected FiltersMissing=true with zero counts, got: %+v", out)
	}
}
func TestPropagate_TargetCert100_NotTouched(t *testing.T) {
	// Target with cert=100 is human-verified — must NOT be overwritten.
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(100, 125, lbl(fTo, "Kiwi", "Male", 100)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.TargetsExamined != 0 || out.Propagated != 0 {
		t.Fatalf("cert=100 target must not be examined: %+v", out)
	}
	df := readFile(t, path)
	if df.Meta.Reviewer != "David" {
		t.Errorf("reviewer should stay David (no write), got %q", df.Meta.Reviewer)
	}
}
func TestPropagate_TargetCert90_NotTouched(t *testing.T) {
	// Target with cert=90 (already propagated earlier) must NOT be re-propagated.
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(100, 125, lbl(fTo, "Kiwi", "Female", 90)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.TargetsExamined != 0 || out.Propagated != 0 {
		t.Fatalf("cert=90 target must not be examined: %+v", out)
	}
	df := readFile(t, path)
	target := findLabel(df, fTo, 100, 125)
	if target.Certainty != 90 || target.CallType != "Female" {
		t.Errorf("cert=90 target was modified: %+v", target)
	}
}
func TestPropagate_TargetCert0_Propagated(t *testing.T) {
	// Target at cert=0 ("Don't Know" / "Noise") SHOULD be propagated when an
	// overlapping cert=100 source exists — rescues labels from the noise bucket
	// so they surface for review even if occasionally wrong.
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(100, 125, lbl(fTo, "Don't Know", "", 0)),
		seg(200, 225, lbl(fFrom, "Kiwi", "Female", 100)),
		seg(200, 225, lbl(fTo, "Noise", "", 0)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.TargetsExamined != 2 || out.Propagated != 2 {
		t.Fatalf("cert=0 targets must be propagated: %+v", out)
	}
	df := readFile(t, path)
	for _, c := range []struct {
		start, end float64
		calltype   string
	}{{100, 125, "Male"}, {200, 225, "Female"}} {
		l := findLabel(df, fTo, c.start, c.end)
		if l == nil || l.Species != "Kiwi" || l.CallType != c.calltype || l.Certainty != 90 {
			t.Errorf("at %v-%v got %+v, want Kiwi+%s cert=90", c.start, c.end, l, c.calltype)
		}
	}
}
func TestPropagate_MultipleSourcesAgree(t *testing.T) {
	// Two overlapping sources with same calltype → propagate.
	path := writeFile(t,
		seg(100, 110, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(105, 120, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.Propagated != 1 || out.SkippedConflict != 0 {
		t.Fatalf("counts wrong: %+v", out)
	}
	df := readFile(t, path)
	target := findLabel(df, fTo, 100, 125)
	if target.CallType != "Male" {
		t.Errorf("calltype should be Male, got %q", target.CallType)
	}
}
func TestPropagate_MultipleSourcesConflict(t *testing.T) {
	// Two overlapping sources with different calltypes → conflict, skip, report.
	path := writeFile(t,
		seg(100, 110, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(115, 120, lbl(fFrom, "Kiwi", "Female", 100)),
		seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.Propagated != 0 || out.SkippedConflict != 1 {
		t.Fatalf("expected 1 conflict skip: %+v", out)
	}
	if len(out.Conflicts) != 1 {
		t.Fatalf("expected 1 conflict report, got %d", len(out.Conflicts))
	}
	if out.Conflicts[0].TargetStart != 100 || out.Conflicts[0].TargetEnd != 125 {
		t.Errorf("conflict target wrong: %+v", out.Conflicts[0])
	}
	if len(out.Conflicts[0].SourceChoices) != 2 {
		t.Errorf("expected 2 source choices, got %d", len(out.Conflicts[0].SourceChoices))
	}
	// Target must NOT be modified.
	df := readFile(t, path)
	target := findLabel(df, fTo, 100, 125)
	if target.CallType != "Duet" || target.Certainty != 70 {
		t.Errorf("conflicted target was modified: %+v", target)
	}
	if df.Meta.Reviewer != "David" {
		t.Errorf("reviewer should stay David (no write), got %q", df.Meta.Reviewer)
	}
}
func TestPropagate_EmptyCallTypePropagates(t *testing.T) {
	// Source with empty calltype → target gets empty calltype.
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Kiwi", "", 100)),
		seg(100, 125, lbl(fTo, "Kiwi", "Male", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.Propagated != 1 {
		t.Fatalf("expected propagated=1: %+v", out)
	}
	df := readFile(t, path)
	target := findLabel(df, fTo, 100, 125)
	if target.CallType != "" {
		t.Errorf("calltype should be cleared, got %q", target.CallType)
	}
	if target.Species != "Kiwi" || target.Certainty != 90 {
		t.Errorf("target fields wrong: %+v", target)
	}
}
func TestPropagate_SpeciesOverride(t *testing.T) {
	// Target species was different from --species; must be overwritten.
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(100, 125, lbl(fTo, "Don't Know", "", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.Propagated != 1 {
		t.Fatalf("expected propagated=1: %+v", out)
	}
	df := readFile(t, path)
	target := findLabel(df, fTo, 100, 125)
	if target.Species != "Kiwi" || target.CallType != "Male" || target.Certainty != 90 {
		t.Errorf("target not overwritten correctly: %+v", target)
	}
}
func TestPropagate_OverlapBoundaryExclusive(t *testing.T) {
	// Segments touching at a point (src ends exactly where tgt starts) do NOT overlap.
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(125, 150, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.Propagated != 0 || out.SkippedNoOverlap != 1 {
		t.Fatalf("touching boundary must not count as overlap: %+v", out)
	}
}
func TestPropagate_OverlapPartial(t *testing.T) {
	// 1-second overlap is enough.
	path := writeFile(t,
		seg(100, 126, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(125, 150, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.Propagated != 1 {
		t.Fatalf("expected propagated=1: %+v", out)
	}
}
func TestPropagate_SupersetEitherDirection(t *testing.T) {
	// Source engulfs target.
	path1 := writeFile(t,
		seg(100, 200, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(110, 150, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	if out, _ := CallsPropagate(CallsPropagateInput{File: path1, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"}); out.Propagated != 1 {
		t.Errorf("source-engulfs-target: %+v", out)
	}
	// Target engulfs source.
	path2 := writeFile(t,
		seg(110, 150, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(100, 200, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	if out, _ := CallsPropagate(CallsPropagateInput{File: path2, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"}); out.Propagated != 1 {
		t.Errorf("target-engulfs-source: %+v", out)
	}
}
func TestPropagate_MissingFlags(t *testing.T) {
	cases := []struct {
		name string
		in   CallsPropagateInput
	}{
		{"no file", CallsPropagateInput{FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"}},
		{"no from", CallsPropagateInput{File: "x", ToFilter: fTo, Species: "Kiwi"}},
		{"no to", CallsPropagateInput{File: "x", FromFilter: fFrom, Species: "Kiwi"}},
		{"no species", CallsPropagateInput{File: "x", FromFilter: fFrom, ToFilter: fTo}},
	}
	for _, c := range cases {
		t.Run(c.name, func(t *testing.T) {
			_, err := CallsPropagate(c.in)
			if err == nil {
				t.Errorf("expected error")
			}
		})
	}
}
func TestPropagate_SameFromAndTo(t *testing.T) {
	_, err := CallsPropagate(CallsPropagateInput{
		File: "x", FromFilter: fFrom, ToFilter: fFrom, Species: "Kiwi",
	})
	if err == nil {
		t.Error("expected error when --from == --to")
	}
}
func TestPropagate_NonexistentFile(t *testing.T) {
	_, err := CallsPropagate(CallsPropagateInput{
		File: "/nonexistent/path.data", FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err == nil {
		t.Error("expected error for nonexistent file")
	}
}
func TestPropagate_RealisticMixed(t *testing.T) {
	// Mimics the 20260228_211500.WAV.data case: cert=0 "Don't Know" and cert=100 Kiwi sources
	// coexist; only cert=100 Kiwi gets propagated.
	path := writeFile(t,
		// Sources (kiwi-1.2)
		seg(45, 52.5, lbl(fFrom, "Don't Know", "", 0)),
		seg(142.5, 177.5, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(195, 217.5, lbl(fFrom, "Don't Know", "", 0)),
		seg(647.5, 682.5, lbl(fFrom, "Kiwi", "Female", 100)),
		seg(815, 855, lbl(fFrom, "Kiwi", "Duet", 100)),
		// Targets (kiwi-1.5)
		seg(147.5, 167.5, lbl(fTo, "Kiwi", "Male", 70)),
		seg(647.5, 672.5, lbl(fTo, "Kiwi", "Female", 70)),
		seg(815, 852.5, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.TargetsExamined != 3 || out.Propagated != 3 || out.SkippedConflict != 0 {
		t.Fatalf("counts wrong: %+v", out)
	}
	df := readFile(t, path)
	expect := []struct {
		start, end float64
		calltype   string
	}{
		{147.5, 167.5, "Male"},
		{647.5, 672.5, "Female"},
		{815, 852.5, "Duet"},
	}
	for _, e := range expect {
		l := findLabel(df, fTo, e.start, e.end)
		if l == nil || l.Certainty != 90 || l.CallType != e.calltype || l.Species != "Kiwi" {
			t.Errorf("at %v-%v got %+v, want Kiwi+%s cert=90", e.start, e.end, l, e.calltype)
		}
	}
}
func TestPropagate_NoWriteIfNothingChanged(t *testing.T) {
	// File with only non-target segments should not be rewritten (reviewer unchanged).
	path := writeFile(t,
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
	)
	out, err := CallsPropagate(CallsPropagateInput{
		File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.Propagated != 0 || out.TargetsExamined != 0 {
		t.Fatalf("expected no activity: %+v", out)
	}
	df := readFile(t, path)
	if df.Meta.Reviewer != "David" {
		t.Errorf("reviewer should not be touched, got %q", df.Meta.Reviewer)
	}
}
// writeFileAt is like writeFile but puts the file inside an existing dir
// with a caller-provided basename (must end in .data).
func writeFileAt(t *testing.T, dir, base string, segs ...*utils.Segment) string {
	t.Helper()
	path := filepath.Join(dir, base)
	df := &utils.DataFile{
		Meta:     &utils.DataMeta{Operator: "ML", Reviewer: "David", Duration: 3600},
		Segments: segs,
	}
	if err := df.Write(path); err != nil {
		t.Fatalf("write fixture: %v", err)
	}
	return path
}
// assertPropagateStats checks output stats against expected values.
func assertPropagateStats(t *testing.T, got, want CallsPropagateFolderOutput) {
	t.Helper()
	checks := []struct {
		name string
		got  int
		want int
	}{
		{"FilesTotal", got.FilesTotal, want.FilesTotal},
		{"FilesWithBothFilters", got.FilesWithBothFilters, want.FilesWithBothFilters},
		{"FilesSkippedNoFilter", got.FilesSkippedNoFilter, want.FilesSkippedNoFilter},
		{"FilesChanged", got.FilesChanged, want.FilesChanged},
		{"FilesErrored", got.FilesErrored, want.FilesErrored},
		{"TargetsExamined", got.TargetsExamined, want.TargetsExamined},
		{"Propagated", got.Propagated, want.Propagated},
		{"SkippedNoOverlap", got.SkippedNoOverlap, want.SkippedNoOverlap},
	}
	for _, c := range checks {
		if c.got != c.want {
			t.Errorf("%s: got %d, want %d", c.name, c.got, c.want)
		}
	}
}
func TestPropagateFolder_AggregatesAndSkipsMissing(t *testing.T) {
	dir := t.TempDir()
	// File A: both filters present, one clean propagation.
	aPath := writeFileAt(t, dir, "a.wav.data",
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	// File B: only target filter — missing source, must be skipped silently.
	bPath := writeFileAt(t, dir, "b.wav.data",
		seg(200, 225, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	// File C: only source filter — missing target, must be skipped silently.
	writeFileAt(t, dir, "c.wav.data",
		seg(300, 325, lbl(fFrom, "Kiwi", "Male", 100)),
	)
	// File D: both filters, but no overlap → targets examined, none propagated.
	dPath := writeFileAt(t, dir, "d.wav.data",
		seg(400, 425, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(500, 525, lbl(fTo, "Kiwi", "Duet", 70)),
	)
	out, err := CallsPropagateFolder(CallsPropagateFolderInput{
		Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	assertPropagateStats(t, out, CallsPropagateFolderOutput{
		FilesTotal:           4,
		FilesWithBothFilters: 2,
		FilesSkippedNoFilter: 2,
		FilesChanged:         1,
		FilesErrored:         0,
		TargetsExamined:      2,
		Propagated:           1,
		SkippedNoOverlap:     1,
	})
	t.Run("file_a_propagated", func(t *testing.T) {
		aDf := readFile(t, aPath)
		if aDf.Meta.Reviewer != "Skraak" {
			t.Errorf("reviewer: got %q, want Skraak", aDf.Meta.Reviewer)
		}
		if l := findLabel(aDf, fTo, 100, 125); l == nil || l.Certainty != 90 || l.CallType != "Male" {
			t.Errorf("target label: got %+v, want cert=90 calltype=Male", l)
		}
	})
	t.Run("file_b_skipped", func(t *testing.T) {
		bDf := readFile(t, bPath)
		if bDf.Meta.Reviewer != "David" {
			t.Errorf("reviewer should not be touched, got %q", bDf.Meta.Reviewer)
		}
	})
	t.Run("file_d_no_overlap", func(t *testing.T) {
		dDf := readFile(t, dPath)
		if dDf.Meta.Reviewer != "David" {
			t.Errorf("reviewer should not be touched, got %q", dDf.Meta.Reviewer)
		}
		if l := findLabel(dDf, fTo, 500, 525); l == nil || l.Certainty != 70 {
			t.Errorf("target label should be unchanged cert=70, got %+v", l)
		}
	})
}
func TestPropagateFolder_EmptyFolder(t *testing.T) {
	dir := t.TempDir()
	out, err := CallsPropagateFolder(CallsPropagateFolderInput{
		Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.FilesTotal != 0 || out.Propagated != 0 {
		t.Errorf("expected empty result, got %+v", out)
	}
}
func TestPropagateFolder_MissingRequiredFlags(t *testing.T) {
	dir := t.TempDir()
	cases := []CallsPropagateFolderInput{
		{Folder: "", FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"},
		{Folder: dir, FromFilter: "", ToFilter: fTo, Species: "Kiwi"},
		{Folder: dir, FromFilter: fFrom, ToFilter: "", Species: "Kiwi"},
		{Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: ""},
		{Folder: dir, FromFilter: fFrom, ToFilter: fFrom, Species: "Kiwi"},
	}
	for i, in := range cases {
		if _, err := CallsPropagateFolder(in); err == nil {
			t.Errorf("case %d: expected error for input %+v", i, in)
		}
	}
}
func TestPropagateFolder_NonexistentFolder(t *testing.T) {
	_, err := CallsPropagateFolder(CallsPropagateFolderInput{
		Folder: "/nonexistent/path/xyz", FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err == nil {
		t.Fatal("expected error for nonexistent folder")
	}
}
func TestPropagateFolder_ConflictsTaggedWithFile(t *testing.T) {
	dir := t.TempDir()
	// Two sources with different calltypes both overlapping one target.
	writeFileAt(t, dir, "conflict.wav.data",
		seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
		seg(110, 130, lbl(fFrom, "Kiwi", "Female", 100)),
		seg(100, 130, lbl(fTo, "Kiwi", "", 70)),
	)
	out, err := CallsPropagateFolder(CallsPropagateFolderInput{
		Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
	})
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if out.SkippedConflict != 1 || len(out.Conflicts) != 1 {
		t.Fatalf("expected one conflict, got %+v", out)
	}
	if out.Conflicts[0].File == "" {
		t.Errorf("conflict should be tagged with file path, got %+v", out.Conflicts[0])
	}
}

File addition: calls_propagate.go (----------)

[0.67281]

package calls
import (
	"fmt"
	"os"
	"skraak/utils"
)
type CallsPropagateInput struct {
	File       string `json:"file"`
	FromFilter string `json:"from_filter"`
	ToFilter   string `json:"to_filter"`
	Species    string `json:"species"`
}
type CallsPropagateOutput struct {
	File             string              `json:"file"`
	FromFilter       string              `json:"from_filter"`
	ToFilter         string              `json:"to_filter"`
	Species          string              `json:"species"`
	FiltersMissing   bool                `json:"filters_missing,omitempty"`
	TargetsExamined  int                 `json:"targets_examined"`
	Propagated       int                 `json:"propagated"`
	SkippedNoOverlap int                 `json:"skipped_no_overlap"`
	SkippedConflict  int                 `json:"skipped_conflict"`
	Conflicts        []PropagateConflict `json:"conflicts,omitempty"`
	Changes          []PropagateChange   `json:"changes,omitempty"`
	Error            string              `json:"error,omitempty"`
}
type CallsPropagateFolderInput struct {
	Folder     string `json:"folder"`
	FromFilter string `json:"from_filter"`
	ToFilter   string `json:"to_filter"`
	Species    string `json:"species"`
}
type CallsPropagateFolderOutput struct {
	Folder               string                 `json:"folder"`
	FromFilter           string                 `json:"from_filter"`
	ToFilter             string                 `json:"to_filter"`
	Species              string                 `json:"species"`
	FilesTotal           int                    `json:"files_total"`
	FilesWithBothFilters int                    `json:"files_with_both_filters"`
	FilesSkippedNoFilter int                    `json:"files_skipped_no_filter"`
	FilesChanged         int                    `json:"files_changed"`
	FilesErrored         int                    `json:"files_errored"`
	TargetsExamined      int                    `json:"targets_examined"`
	Propagated           int                    `json:"propagated"`
	SkippedNoOverlap     int                    `json:"skipped_no_overlap"`
	SkippedConflict      int                    `json:"skipped_conflict"`
	Conflicts            []PropagateConflict    `json:"conflicts,omitempty"`
	Errors               []CallsPropagateOutput `json:"errors,omitempty"`
	Error                string                 `json:"error,omitempty"`
}
type PropagateConflict struct {
	File           string                  `json:"file,omitempty"`
	TargetStart    float64                 `json:"target_start"`
	TargetEnd      float64                 `json:"target_end"`
	TargetCallType string                  `json:"target_calltype,omitempty"`
	SourceChoices  []PropagateSourceChoice `json:"source_choices"`
}
type PropagateSourceChoice struct {
	Start    float64 `json:"start"`
	End      float64 `json:"end"`
	Species  string  `json:"species"`
	CallType string  `json:"calltype,omitempty"`
}
type PropagateChange struct {
	TargetStart   float64 `json:"target_start"`
	TargetEnd     float64 `json:"target_end"`
	PrevSpecies   string  `json:"prev_species"`
	PrevCallType  string  `json:"prev_calltype,omitempty"`
	PrevCertainty int     `json:"prev_certainty"`
	NewSpecies    string  `json:"new_species"`
	NewCallType   string  `json:"new_calltype,omitempty"`
	NewCertainty  int     `json:"new_certainty"`
}
// CallsPropagate copies verified classifications (certainty==100) from one filter's
// segments to overlapping target segments of another filter, within a single .data file.
// Target labels with certainty==70 (ML-unverified) or certainty==0 (Don't Know / Noise)
// are updated — targets at certainty==100 (human-verified) and certainty==90 (already
// propagated) are left alone. Only source labels matching --species are considered.
// Propagated target labels are set to certainty=90 and file reviewer is set to "Skraak".
func CallsPropagate(input CallsPropagateInput) (CallsPropagateOutput, error) {
	output := CallsPropagateOutput{
		File:       input.File,
		FromFilter: input.FromFilter,
		ToFilter:   input.ToFilter,
		Species:    input.Species,
	}
	if err := validatePropagateInput(&output, input); err != nil {
		return output, err
	}
	df, err := utils.ParseDataFile(input.File)
	if err != nil {
		output.Error = fmt.Sprintf("parse %s: %v", input.File, err)
		return output, fmt.Errorf("%s", output.Error)
	}
	// Fast path: skip files that don't contain both filters at all.
	if !hasBothFilters(df, input.FromFilter, input.ToFilter) {
		output.FiltersMissing = true
		return output, nil
	}
	sources := collectPropagateSources(df, input.FromFilter, input.Species)
	propagateTargets(df, sources, input, &output)
	if output.Propagated > 0 {
		df.Meta.Reviewer = "Skraak"
		if err := df.Write(input.File); err != nil {
			output.Error = fmt.Sprintf("write %s: %v", input.File, err)
			return output, fmt.Errorf("%s", output.Error)
		}
	}
	return output, nil
}
// validatePropagateInput checks required fields and file existence
func validatePropagateInput(output *CallsPropagateOutput, input CallsPropagateInput) error {
	checks := []struct {
		val string
		msg string
	}{
		{input.File, "--file is required"},
		{input.FromFilter, "--from is required"},
		{input.ToFilter, "--to is required"},
		{input.Species, "--species is required"},
	}
	for _, c := range checks {
		if c.val == "" {
			output.Error = c.msg
			return fmt.Errorf("%s", c.msg)
		}
	}
	if input.FromFilter == input.ToFilter {
		output.Error = "--from and --to must differ"
		return fmt.Errorf("%s", output.Error)
	}
	if _, err := os.Stat(input.File); os.IsNotExist(err) {
		output.Error = fmt.Sprintf("file not found: %s", input.File)
		return fmt.Errorf("%s", output.Error)
	}
	return nil
}
// hasBothFilters checks whether the data file contains both from and to filters
func hasBothFilters(df *utils.DataFile, fromFilter, toFilter string) bool {
	hasFrom, hasTo := false, false
	for _, seg := range df.Segments {
		for _, lbl := range seg.Labels {
			if lbl.Filter == fromFilter {
				hasFrom = true
			}
			if lbl.Filter == toFilter {
				hasTo = true
			}
			if hasFrom && hasTo {
				return true
			}
		}
	}
	return false
}
// sourceRef pairs a segment with its matching source label
type sourceRef struct {
	seg   *utils.Segment
	label *utils.Label
}
// collectPropagateSources gathers verified source labels (certainty==100) for the given filter/species
func collectPropagateSources(df *utils.DataFile, fromFilter, species string) []sourceRef {
	var sources []sourceRef
	for _, seg := range df.Segments {
		for _, lbl := range seg.Labels {
			if lbl.Filter == fromFilter && lbl.Species == species && lbl.Certainty == 100 {
				sources = append(sources, sourceRef{seg: seg, label: lbl})
				break
			}
		}
	}
	return sources
}
// propagateTargets iterates target segments, finds overlapping sources, and applies agreed classifications
func propagateTargets(df *utils.DataFile, sources []sourceRef, input CallsPropagateInput, output *CallsPropagateOutput) {
	for _, tSeg := range df.Segments {
		toLabel := findUpdatableTargetLabel(tSeg.Labels, input.ToFilter)
		if toLabel == nil {
			continue
		}
		output.TargetsExamined++
		overlaps := findOverlappingSources(sources, tSeg)
		if len(overlaps) == 0 {
			output.SkippedNoOverlap++
			continue
		}
		agreedCallType, conflict := resolveCallType(overlaps)
		if conflict {
			output.SkippedConflict++
			output.Conflicts = append(output.Conflicts, buildConflictRecord(tSeg, toLabel, overlaps))
			continue
		}
		applyPropagation(toLabel, input.Species, agreedCallType, tSeg, output)
	}
}
// findUpdatableTargetLabel finds a target label with certainty 70 or 0 for the given filter
func findUpdatableTargetLabel(labels []*utils.Label, toFilter string) *utils.Label {
	for _, lbl := range labels {
		if lbl.Filter == toFilter && (lbl.Certainty == 70 || lbl.Certainty == 0) {
			return lbl
		}
	}
	return nil
}
// findOverlappingSources returns sources whose segments overlap with the target segment
func findOverlappingSources(sources []sourceRef, tSeg *utils.Segment) []sourceRef {
	var overlaps []sourceRef
	for _, s := range sources {
		if s.seg.StartTime < tSeg.EndTime && tSeg.StartTime < s.seg.EndTime {
			overlaps = append(overlaps, s)
		}
	}
	return overlaps
}
// resolveCallType checks if all overlapping sources agree on a call type.
// Returns the agreed call type and whether there is a conflict.
func resolveCallType(overlaps []sourceRef) (string, bool) {
	agreedCallType := overlaps[0].label.CallType
	for _, s := range overlaps[1:] {
		if s.label.CallType != agreedCallType {
			return "", true
		}
	}
	return agreedCallType, false
}
// buildConflictRecord creates a PropagateConflict from overlapping disagreeing sources
func buildConflictRecord(tSeg *utils.Segment, toLabel *utils.Label, overlaps []sourceRef) PropagateConflict {
	choices := make([]PropagateSourceChoice, 0, len(overlaps))
	for _, s := range overlaps {
		choices = append(choices, PropagateSourceChoice{
			Start:    s.seg.StartTime,
			End:      s.seg.EndTime,
			Species:  s.label.Species,
			CallType: s.label.CallType,
		})
	}
	return PropagateConflict{
		TargetStart:    tSeg.StartTime,
		TargetEnd:      tSeg.EndTime,
		TargetCallType: toLabel.CallType,
		SourceChoices:  choices,
	}
}
// applyPropagation updates the target label and records the change
func applyPropagation(toLabel *utils.Label, species, callType string, tSeg *utils.Segment, output *CallsPropagateOutput) {
	change := PropagateChange{
		TargetStart:   tSeg.StartTime,
		TargetEnd:     tSeg.EndTime,
		PrevSpecies:   toLabel.Species,
		PrevCallType:  toLabel.CallType,
		PrevCertainty: toLabel.Certainty,
		NewSpecies:    species,
		NewCallType:   callType,
		NewCertainty:  90,
	}
	toLabel.Species = species
	toLabel.CallType = callType
	toLabel.Certainty = 90
	output.Propagated++
	output.Changes = append(output.Changes, change)
}
// CallsPropagateFolder runs CallsPropagate against every .data file in a folder,
// aggregating counts. Files that do not contain both --from and --to filters are
// skipped silently (counted as files_skipped_no_filter). Parse/write errors on
// individual files are collected in Errors; they don't abort the run.
func CallsPropagateFolder(input CallsPropagateFolderInput) (CallsPropagateFolderOutput, error) {
	output := CallsPropagateFolderOutput{
		Folder:     input.Folder,
		FromFilter: input.FromFilter,
		ToFilter:   input.ToFilter,
		Species:    input.Species,
	}
	if input.Folder == "" {
		output.Error = "--folder is required"
		return output, fmt.Errorf("%s", output.Error)
	}
	if input.FromFilter == "" {
		output.Error = "--from is required"
		return output, fmt.Errorf("%s", output.Error)
	}
	if input.ToFilter == "" {
		output.Error = "--to is required"
		return output, fmt.Errorf("%s", output.Error)
	}
	if input.Species == "" {
		output.Error = "--species is required"
		return output, fmt.Errorf("%s", output.Error)
	}
	if input.FromFilter == input.ToFilter {
		output.Error = "--from and --to must differ"
		return output, fmt.Errorf("%s", output.Error)
	}
	info, err := os.Stat(input.Folder)
	if err != nil {
		output.Error = fmt.Sprintf("folder not found: %s", input.Folder)
		return output, fmt.Errorf("%s", output.Error)
	}
	if !info.IsDir() {
		output.Error = fmt.Sprintf("not a directory: %s", input.Folder)
		return output, fmt.Errorf("%s", output.Error)
	}
	files, err := utils.FindDataFiles(input.Folder)
	if err != nil {
		output.Error = fmt.Sprintf("list .data files: %v", err)
		return output, fmt.Errorf("%s", output.Error)
	}
	output.FilesTotal = len(files)
	for _, f := range files {
		fileOut, err := CallsPropagate(CallsPropagateInput{
			File:       f,
			FromFilter: input.FromFilter,
			ToFilter:   input.ToFilter,
			Species:    input.Species,
		})
		if err != nil {
			output.FilesErrored++
			output.Errors = append(output.Errors, fileOut)
			continue
		}
		if fileOut.FiltersMissing {
			output.FilesSkippedNoFilter++
			continue
		}
		output.FilesWithBothFilters++
		output.TargetsExamined += fileOut.TargetsExamined
		output.Propagated += fileOut.Propagated
		output.SkippedNoOverlap += fileOut.SkippedNoOverlap
		output.SkippedConflict += fileOut.SkippedConflict
		if fileOut.Propagated > 0 {
			output.FilesChanged++
		}
		for _, c := range fileOut.Conflicts {
			c.File = f
			output.Conflicts = append(output.Conflicts, c)
		}
	}
	return output, nil
}

File addition: calls_modify_test.go (----------)

[0.67281]

package calls
import (
	"path/filepath"
	"testing"
	"skraak/utils"
)
func TestCallsModifyBookmark(t *testing.T) {
	// Create a temp .data file with a bookmarked segment
	tmpDir := t.TempDir()
	tmpFile := filepath.Join(tmpDir, "test.data")
	df := &utils.DataFile{
		Meta: &utils.DataMeta{Operator: "test", Duration: 60},
		Segments: []*utils.Segment{
			{
				StartTime: 10.0,
				EndTime:   15.0,
				FreqLow:   100,
				FreqHigh:  5000,
				Labels: []*utils.Label{
					{Species: "Kiwi", Certainty: 80, Filter: "myfilter", CallType: "Duet", Bookmark: true},
				},
			},
		},
	}
	if err := df.Write(tmpFile); err != nil {
		t.Fatalf("failed to write test file: %v", err)
	}
	// Test 1: Adding bookmark when already true should do nothing
	bookmark := true
	result, err := CallsModify(CallsModifyInput{
		File:      tmpFile,
		Reviewer:  "tester",
		Filter:    "myfilter",
		Segment:   "10-15",
		Certainty: 80,
		Bookmark:  &bookmark,
	})
	// Should return error "no changes needed"
	if err == nil {
		t.Errorf("expected error 'no changes needed' when bookmark already true, got nil")
	}
	if result.Error != "No changes needed: all values already match" {
		t.Errorf("expected 'no changes needed' error, got: %s", result.Error)
	}
	// Verify bookmark is still true in the file
	df2, err := utils.ParseDataFile(tmpFile)
	if err != nil {
		t.Fatalf("failed to parse file: %v", err)
	}
	if !df2.Segments[0].Labels[0].Bookmark {
		t.Errorf("bookmark should still be true, got false")
	}
}
func TestCallsModifyBookmarkFalse(t *testing.T) {
	// Create a temp .data file WITHOUT a bookmark
	tmpDir := t.TempDir()
	tmpFile := filepath.Join(tmpDir, "test.data")
	df := &utils.DataFile{
		Meta: &utils.DataMeta{Operator: "test", Duration: 60},
		Segments: []*utils.Segment{
			{
				StartTime: 10.0,
				EndTime:   15.0,
				FreqLow:   100,
				FreqHigh:  5000,
				Labels: []*utils.Label{
					{Species: "Kiwi", Certainty: 80, Filter: "myfilter", CallType: "Duet", Bookmark: false},
				},
			},
		},
	}
	if err := df.Write(tmpFile); err != nil {
		t.Fatalf("failed to write test file: %v", err)
	}
	// Test: Adding bookmark when false should set it to true
	bookmark := true
	result, err := CallsModify(CallsModifyInput{
		File:      tmpFile,
		Reviewer:  "tester",
		Filter:    "myfilter",
		Segment:   "10-15",
		Certainty: 80,
		Bookmark:  &bookmark,
	})
	if err != nil {
		t.Errorf("unexpected error: %v", err)
	}
	if result.Bookmark == nil || !*result.Bookmark {
		t.Errorf("expected bookmark=true in result, got %v", result.Bookmark)
	}
	// Verify bookmark is true in the file
	df2, err := utils.ParseDataFile(tmpFile)
	if err != nil {
		t.Fatalf("failed to parse file: %v", err)
	}
	if !df2.Segments[0].Labels[0].Bookmark {
		t.Errorf("bookmark should be true, got false")
	}
}
func TestCallsModifyCommentAdditive(t *testing.T) {
	// Create a temp .data file with an existing comment
	tmpDir := t.TempDir()
	tmpFile := filepath.Join(tmpDir, "test.data")
	df := &utils.DataFile{
		Meta: &utils.DataMeta{Operator: "test", Duration: 60},
		Segments: []*utils.Segment{
			{
				StartTime: 10.0,
				EndTime:   15.0,
				FreqLow:   100,
				FreqHigh:  5000,
				Labels: []*utils.Label{
					{Species: "Kiwi", Certainty: 80, Filter: "myfilter", Comment: "First observation"},
				},
			},
		},
	}
	if err := df.Write(tmpFile); err != nil {
		t.Fatalf("failed to write test file: %v", err)
	}
	// Test: Adding comment should be additive
	result, err := CallsModify(CallsModifyInput{
		File:      tmpFile,
		Reviewer:  "tester",
		Filter:    "myfilter",
		Segment:   "10-15",
		Certainty: 80,
		Comment:   "Good example",
	})
	if err != nil {
		t.Errorf("unexpected error: %v", err)
	}
	expectedComment := "First observation | Good example"
	if result.Comment != expectedComment {
		t.Errorf("expected comment=%q, got %q", expectedComment, result.Comment)
	}
	// Verify comment in file
	df2, err := utils.ParseDataFile(tmpFile)
	if err != nil {
		t.Fatalf("failed to parse file: %v", err)
	}
	if df2.Segments[0].Labels[0].Comment != expectedComment {
		t.Errorf("expected comment in file=%q, got %q", expectedComment, df2.Segments[0].Labels[0].Comment)
	}
}
func TestCallsModifyCommentAdditiveMultiple(t *testing.T) {
	// Create a temp .data file and add multiple comments
	tmpDir := t.TempDir()
	tmpFile := filepath.Join(tmpDir, "test.data")
	df := &utils.DataFile{
		Meta: &utils.DataMeta{Operator: "test", Duration: 60},
		Segments: []*utils.Segment{
			{
				StartTime: 10.0,
				EndTime:   15.0,
				FreqLow:   100,
				FreqHigh:  5000,
				Labels: []*utils.Label{
					{Species: "Kiwi", Certainty: 80, Filter: "myfilter"},
				},
			},
		},
	}
	if err := df.Write(tmpFile); err != nil {
		t.Fatalf("failed to write test file: %v", err)
	}
	// Add first comment
	_, err := CallsModify(CallsModifyInput{
		File:      tmpFile,
		Reviewer:  "tester",
		Filter:    "myfilter",
		Segment:   "10-15",
		Certainty: 80,
		Comment:   "First",
	})
	if err != nil {
		t.Fatalf("unexpected error on first comment: %v", err)
	}
	// Add second comment
	_, err = CallsModify(CallsModifyInput{
		File:      tmpFile,
		Reviewer:  "tester",
		Filter:    "myfilter",
		Segment:   "10-15",
		Certainty: 80,
		Comment:   "Second",
	})
	if err != nil {
		t.Fatalf("unexpected error on second comment: %v", err)
	}
	// Add third comment
	result, err := CallsModify(CallsModifyInput{
		File:      tmpFile,
		Reviewer:  "tester",
		Filter:    "myfilter",
		Segment:   "10-15",
		Certainty: 80,
		Comment:   "Third",
	})
	if err != nil {
		t.Fatalf("unexpected error on third comment: %v", err)
	}
	expectedComment := "First | Second | Third"
	if result.Comment != expectedComment {
		t.Errorf("expected comment=%q, got %q", expectedComment, result.Comment)
	}
}
func TestCallsModifyCommentTooLong(t *testing.T) {
	// Create a temp .data file with an existing long comment
	tmpDir := t.TempDir()
	tmpFile := filepath.Join(tmpDir, "test.data")
	existingComment := "This is a fairly long existing comment that takes up space"
	df := &utils.DataFile{
		Meta: &utils.DataMeta{Operator: "test", Duration: 60},
		Segments: []*utils.Segment{
			{
				StartTime: 10.0,
				EndTime:   15.0,
				FreqLow:   100,
				FreqHigh:  5000,
				Labels: []*utils.Label{
					{Species: "Kiwi", Certainty: 80, Filter: "myfilter", Comment: existingComment},
				},
			},
		},
	}
	if err := df.Write(tmpFile); err != nil {
		t.Fatalf("failed to write test file: %v", err)
	}
	// Test: Adding a long comment that would exceed 140 chars should fail
	longNewComment := "This is another very long comment that when combined with the existing one will exceed the limit"
	result, err := CallsModify(CallsModifyInput{
		File:      tmpFile,
		Reviewer:  "tester",
		Filter:    "myfilter",
		Segment:   "10-15",
		Certainty: 80,
		Comment:   longNewComment,
	})
	if err == nil {
		t.Errorf("expected error for combined comment exceeding 140 chars, got nil")
	}
	if result.Error == "" {
		t.Errorf("expected error message, got empty")
	}
	// Verify original comment is preserved
	df2, err := utils.ParseDataFile(tmpFile)
	if err != nil {
		t.Fatalf("failed to parse file: %v", err)
	}
	if df2.Segments[0].Labels[0].Comment != existingComment {
		t.Errorf("original comment should be preserved, got %q", df2.Segments[0].Labels[0].Comment)
	}
}
func TestCallsModifyPreservesBookmarkOnOtherChange(t *testing.T) {
	// Create a temp .data file with a bookmark
	tmpDir := t.TempDir()
	tmpFile := filepath.Join(tmpDir, "test.data")
	df := &utils.DataFile{
		Meta: &utils.DataMeta{Operator: "test", Duration: 60},
		Segments: []*utils.Segment{
			{
				StartTime: 10.0,
				EndTime:   15.0,
				FreqLow:   100,
				FreqHigh:  5000,
				Labels: []*utils.Label{
					{Species: "Kiwi", Certainty: 80, Filter: "myfilter", Bookmark: true},
				},
			},
		},
	}
	if err := df.Write(tmpFile); err != nil {
		t.Fatalf("failed to write test file: %v", err)
	}
	// Change certainty (without passing --bookmark) - bookmark should be preserved
	result, err := CallsModify(CallsModifyInput{
		File:      tmpFile,
		Reviewer:  "tester",
		Filter:    "myfilter",
		Segment:   "10-15",
		Certainty: 100,
		// No Bookmark set
	})
	if err != nil {
		t.Errorf("unexpected error: %v", err)
	}
	if result.Bookmark != nil {
		t.Errorf("bookmark should not be in output when not changed, got %v", result.Bookmark)
	}
	// Verify bookmark is still true in the file
	df2, err := utils.ParseDataFile(tmpFile)
	if err != nil {
		t.Fatalf("failed to parse file: %v", err)
	}
	if !df2.Segments[0].Labels[0].Bookmark {
		t.Errorf("bookmark should still be true after changing certainty, got false")
	}
}
func TestCallsModifyInvalidSegment(t *testing.T) {
	tmpDir := t.TempDir()
	tmpFile := filepath.Join(tmpDir, "test.data")
	df := &utils.DataFile{
		Meta: &utils.DataMeta{Operator: "test", Duration: 60},
		Segments: []*utils.Segment{
			{
				StartTime: 10.0,
				EndTime:   15.0,
				FreqLow:   100,
				FreqHigh:  5000,
				Labels: []*utils.Label{
					{Species: "Kiwi", Certainty: 80, Filter: "myfilter"},
				},
			},
		},
	}
	if err := df.Write(tmpFile); err != nil {
		t.Fatalf("failed to write test file: %v", err)
	}
	// Test: Non-existent segment should error
	result, err := CallsModify(CallsModifyInput{
		File:      tmpFile,
		Reviewer:  "tester",
		Filter:    "myfilter",
		Segment:   "99-100",
		Certainty: 80,
	})
	if err == nil {
		t.Errorf("expected error for non-existent segment, got nil")
	}
	if result.Error == "" {
		t.Errorf("expected error message, got empty")
	}
}

File addition: calls_modify.go (----------)

[0.67281]

package calls
import (
	"fmt"
	"math"
	"os"
	"strings"
	"skraak/utils"
)
// CallsModifyInput defines the input for the modify tool
type CallsModifyInput struct {
	File      string `json:"file"`
	Reviewer  string `json:"reviewer"`
	Filter    string `json:"filter"`
	Segment   string `json:"segment"`
	Certainty int    `json:"certainty"`
	Species   string `json:"species"`
	Bookmark  *bool  `json:"bookmark"`
	Comment   string `json:"comment"`
}
// CallsModifyOutput defines the output for the modify tool
type CallsModifyOutput struct {
	File          string `json:"file"`
	SegmentStart  int    `json:"segment_start"`
	SegmentEnd    int    `json:"segment_end"`
	Species       string `json:"species,omitempty"`
	CallType      string `json:"calltype,omitempty"`
	Certainty     int    `json:"certainty,omitempty"`
	Bookmark      *bool  `json:"bookmark,omitempty"`
	Comment       string `json:"comment,omitempty"`
	PreviousValue string `json:"previous_value,omitempty"`
	Error         string `json:"error,omitempty"`
}
// validateModifyInput checks required fields and comment constraints.
func validateModifyInput(input CallsModifyInput) error {
	if input.File == "" {
		return fmt.Errorf("--file is required")
	}
	if input.Reviewer == "" {
		return fmt.Errorf("--reviewer is required")
	}
	if input.Filter == "" {
		return fmt.Errorf("--filter is required")
	}
	if input.Segment == "" {
		return fmt.Errorf("--segment is required")
	}
	if len(input.Comment) > 140 {
		return fmt.Errorf("--comment must be 140 characters or less")
	}
	for i, r := range input.Comment {
		if r > 127 {
			return fmt.Errorf("--comment must be ASCII only (non-ASCII at position %d)", i)
		}
	}
	return nil
}
// resolveSpecies parses species+calltype from the input species string.
// If input species is empty, keeps the existing label values.
func resolveSpecies(inputSpecies string, label *utils.Label) (species, callType string) {
	if inputSpecies == "" {
		return label.Species, label.CallType
	}
	if before, after, ok := strings.Cut(inputSpecies, "+"); ok {
		return before, after
	}
	return inputSpecies, ""
}
// hasModifyChanges checks whether any field would actually change.
func hasModifyChanges(newSpecies, newCallType string, input CallsModifyInput, label *utils.Label) bool {
	if newSpecies != label.Species || newCallType != label.CallType {
		return true
	}
	if input.Certainty != label.Certainty {
		return true
	}
	if input.Bookmark != nil && *input.Bookmark != label.Bookmark {
		return true
	}
	if input.Comment != "" {
		return true
	}
	return false
}
// applyLabelChanges updates the label and data file, populating the output.
func applyLabelChanges(label *utils.Label, dataFile *utils.DataFile, input CallsModifyInput, newSpecies, newCallType string, output *CallsModifyOutput) error {
	dataFile.Meta.Reviewer = input.Reviewer
	label.Species = newSpecies
	label.CallType = newCallType
	output.Species = newSpecies
	output.CallType = newCallType
	label.Certainty = input.Certainty
	output.Certainty = input.Certainty
	if input.Bookmark != nil && *input.Bookmark != label.Bookmark {
		label.Bookmark = *input.Bookmark
		output.Bookmark = input.Bookmark
	}
	if input.Comment != "" {
		var newComment string
		if label.Comment != "" {
			newComment = label.Comment + " | " + input.Comment
		} else {
			newComment = input.Comment
		}
		if len(newComment) > 140 {
			return fmt.Errorf("combined comment exceeds 140 characters (%d)", len(newComment))
		}
		label.Comment = newComment
		output.Comment = newComment
	}
	return nil
}
// CallsModify modifies a label in a .data file
func CallsModify(input CallsModifyInput) (CallsModifyOutput, error) {
	var output CallsModifyOutput
	if err := validateModifyInput(input); err != nil {
		output.Error = err.Error()
		return output, err
	}
	startTime, endTime, err := parseSegmentRange(input.Segment)
	if err != nil {
		output.Error = err.Error()
		return output, err
	}
	output.File = input.File
	output.SegmentStart = startTime
	output.SegmentEnd = endTime
	if _, err := os.Stat(input.File); os.IsNotExist(err) {
		output.Error = fmt.Sprintf("File not found: %s", input.File)
		return output, fmt.Errorf("%s", output.Error)
	}
	dataFile, err := utils.ParseDataFile(input.File)
	if err != nil {
		output.Error = fmt.Sprintf("Failed to parse file: %v", err)
		return output, fmt.Errorf("%s", output.Error)
	}
	segment := findSegment(dataFile.Segments, startTime, endTime, input.Filter)
	if segment == nil {
		output.Error = fmt.Sprintf("No segment found matching time range %d-%d", startTime, endTime)
		return output, fmt.Errorf("%s", output.Error)
	}
	targetLabel := findLabelByFilter(segment, input.Filter)
	if targetLabel == nil {
		output.Error = fmt.Sprintf("No label found with filter '%s' in segment %d-%d", input.Filter, startTime, endTime)
		return output, fmt.Errorf("%s", output.Error)
	}
	output.PreviousValue = formatLabel(targetLabel)
	newSpecies, newCallType := resolveSpecies(input.Species, targetLabel)
	if !hasModifyChanges(newSpecies, newCallType, input, targetLabel) {
		output.Error = "No changes needed: all values already match"
		return output, fmt.Errorf("%s", output.Error)
	}
	if err := applyLabelChanges(targetLabel, dataFile, input, newSpecies, newCallType, &output); err != nil {
		output.Error = err.Error()
		return output, err
	}
	if err := dataFile.Write(input.File); err != nil {
		output.Error = fmt.Sprintf("Failed to save file: %v", err)
		return output, fmt.Errorf("%s", output.Error)
	}
	return output, nil
}
// findLabelByFilter finds the first label matching the given filter in a segment.
func findLabelByFilter(segment *utils.Segment, filter string) *utils.Label {
	for _, label := range segment.Labels {
		if label.Filter == filter {
			return label
		}
	}
	return nil
}
// parseSegmentRange parses "12-15" format into start and end integers
func parseSegmentRange(s string) (int, int, error) {
	parts := strings.Split(s, "-")
	if len(parts) != 2 {
		return 0, 0, fmt.Errorf("invalid segment format: %s (expected start-end, e.g., 12-15)", s)
	}
	var start, end int
	if _, err := fmt.Sscanf(parts[0], "%d", &start); err != nil {
		return 0, 0, fmt.Errorf("invalid start time: %s", parts[0])
	}
	if _, err := fmt.Sscanf(parts[1], "%d", &end); err != nil {
		return 0, 0, fmt.Errorf("invalid end time: %s", parts[1])
	}
	if start < 0 || end < 0 {
		return 0, 0, fmt.Errorf("times must be non-negative")
	}
	if start >= end {
		return 0, 0, fmt.Errorf("start time must be less than end time")
	}
	return start, end, nil
}
// findSegment finds a segment matching the time range using floor/ceil matching.
// It also checks that the segment contains a label with the specified filter,
// so that duplicate segments (same time range, different filters) are resolved correctly.
func findSegment(segments []*utils.Segment, startTime, endTime int, filter string) *utils.Segment {
	for _, seg := range segments {
		segStart := int(math.Floor(seg.StartTime))
		segEnd := int(math.Ceil(seg.EndTime))
		if segEnd == segStart {
			segEnd = segStart + 1 // minimum 1 second
		}
		if segStart == startTime && segEnd == endTime {
			for _, label := range seg.Labels {
				if label.Filter == filter {
					return seg
				}
			}
		}
	}
	return nil
}
// formatLabel formats a label for display
func formatLabel(label *utils.Label) string {
	result := label.Species
	if label.CallType != "" {
		result += "+" + label.CallType
	}
	result += fmt.Sprintf(" (%d%%)", label.Certainty)
	return result
}

File addition: calls_from_raven.go (----------)

[0.67281]

package calls
import (
	"bufio"
	"fmt"
	"os"
	"path/filepath"
	"strconv"
	"strings"
	"skraak/utils"
)
// CallsFromRavenInput defines the input for the calls-from-raven tool
type CallsFromRavenInput struct {
	Folder          string          `json:"folder"`
	File            string          `json:"file"`
	Delete          bool            `json:"delete"`
	ProgressHandler ProgressHandler `json:"-"` // Optional progress callback
}
// CallsFromRavenOutput defines the output for the calls-from-raven tool
type CallsFromRavenOutput struct {
	Calls            []ClusteredCall `json:"calls"`
	TotalCalls       int             `json:"total_calls"`
	SpeciesCount     map[string]int  `json:"species_count"`
	DataFilesWritten int             `json:"data_files_written"`
	DataFilesSkipped int             `json:"data_files_skipped"`
	FilesProcessed   int             `json:"files_processed"`
	FilesDeleted     int             `json:"files_deleted"`
	Filter           string          `json:"filter"`
	Error            *string         `json:"error,omitempty"`
}
// ravenSource implements CallSource for Raven selection files
type ravenSource struct{}
func (ravenSource) Name() string { return "Raven" }
func (ravenSource) FindFiles(folder string) ([]string, error) {
	var files []string
	entries, err := os.ReadDir(folder)
	if err != nil {
		return nil, err
	}
	for _, entry := range entries {
		name := entry.Name()
		if strings.HasSuffix(name, ".selections.txt") {
			files = append(files, filepath.Join(folder, name))
		}
	}
	return files, nil
}
func (ravenSource) ProcessFile(ravenFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
	return processRavenFileCached(ravenFile, cache)
}
// CallsFromRaven processes Raven selection files and writes .data files
func CallsFromRaven(input CallsFromRavenInput) (CallsFromRavenOutput, error) {
	src := ravenSource{}
	commonInput := CallsFromSourceInput(input)
	commonOutput, err := callsFromSource(src, commonInput)
	// Convert to Raven-specific output type
	var output CallsFromRavenOutput
	output.Calls = commonOutput.Calls
	output.TotalCalls = commonOutput.TotalCalls
	output.SpeciesCount = commonOutput.SpeciesCount
	output.DataFilesWritten = commonOutput.DataFilesWritten
	output.DataFilesSkipped = commonOutput.DataFilesSkipped
	output.FilesProcessed = commonOutput.FilesProcessed
	output.FilesDeleted = commonOutput.FilesDeleted
	output.Filter = commonOutput.Filter
	output.Error = commonOutput.Error
	return output, err
}
// RavenSelection represents a single Raven selection
type RavenSelection struct {
	StartTime float64
	EndTime   float64
	FreqLow   float64
	FreqHigh  float64
	Species   string
}
// ravenColumnIndices holds the column index positions for a Raven file
type ravenColumnIndices struct {
	beginTimeIdx int
	endTimeIdx   int
	lowFreqIdx   int
	highFreqIdx  int
	speciesIdx   int
}
// parseRavenHeader finds column indices from a tab-separated header line
func parseRavenHeader(header []string) (ravenColumnIndices, error) {
	idx := ravenColumnIndices{beginTimeIdx: -1, endTimeIdx: -1, lowFreqIdx: -1, highFreqIdx: -1, speciesIdx: -1}
	for i, col := range header {
		switch col {
		case "Begin Time (s)":
			idx.beginTimeIdx = i
		case "End Time (s)":
			idx.endTimeIdx = i
		case "Low Freq (Hz)":
			idx.lowFreqIdx = i
		case "High Freq (Hz)":
			idx.highFreqIdx = i
		case "Species":
			idx.speciesIdx = i
		}
	}
	if idx.beginTimeIdx == -1 || idx.endTimeIdx == -1 || idx.speciesIdx == -1 {
		return idx, fmt.Errorf("missing required columns in Raven file")
	}
	return idx, nil
}
// parseRavenSelections reads all selection rows from a scanner and returns parsed selections
func parseRavenSelections(scanner *bufio.Scanner, idx ravenColumnIndices) ([]RavenSelection, error) {
	var selections []RavenSelection
	for scanner.Scan() {
		line := scanner.Text()
		if line == "" {
			continue
		}
		fields := strings.Split(line, "\t")
		if len(fields) <= idx.speciesIdx {
			continue
		}
		sel, err := parseRavenRow(fields, idx)
		if err != nil {
			return nil, err
		}
		selections = append(selections, sel)
	}
	if err := scanner.Err(); err != nil {
		return nil, fmt.Errorf("error reading file: %w", err)
	}
	return selections, nil
}
// parseRavenRow parses a single tab-separated row into a RavenSelection
func parseRavenRow(fields []string, idx ravenColumnIndices) (RavenSelection, error) {
	var sel RavenSelection
	startTime, err := strconv.ParseFloat(fields[idx.beginTimeIdx], 64)
	if err != nil {
		return sel, fmt.Errorf("failed to parse begin time %q: %w", fields[idx.beginTimeIdx], err)
	}
	sel.StartTime = startTime
	endTime, err := strconv.ParseFloat(fields[idx.endTimeIdx], 64)
	if err != nil {
		return sel, fmt.Errorf("failed to parse end time %q: %w", fields[idx.endTimeIdx], err)
	}
	sel.EndTime = endTime
	if idx.lowFreqIdx >= 0 && idx.lowFreqIdx < len(fields) {
		freqLow, err := strconv.ParseFloat(fields[idx.lowFreqIdx], 64)
		if err != nil {
			return sel, fmt.Errorf("failed to parse low freq %q: %w", fields[idx.lowFreqIdx], err)
		}
		sel.FreqLow = freqLow
	}
	if idx.highFreqIdx >= 0 && idx.highFreqIdx < len(fields) {
		freqHigh, err := strconv.ParseFloat(fields[idx.highFreqIdx], 64)
		if err != nil {
			return sel, fmt.Errorf("failed to parse high freq %q: %w", fields[idx.highFreqIdx], err)
		}
		sel.FreqHigh = freqHigh
	}
	sel.Species = fields[idx.speciesIdx]
	return sel, nil
}
// deriveWAVBaseName extracts the base WAV filename from a Raven .selections.txt filename
func deriveWAVBaseName(ravenFile string) string {
	base := filepath.Base(ravenFile)
	nameWithoutSuffix := strings.TrimSuffix(base, ".selections.txt")
	idx := strings.Index(nameWithoutSuffix, ".Table.")
	if idx > 0 {
		nameWithoutSuffix = nameWithoutSuffix[:idx]
	}
	return nameWithoutSuffix
}
// processRavenFileCached processes a single Raven selection file using a DirCache for WAV lookup
func processRavenFileCached(ravenFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
	file, err := os.Open(ravenFile)
	if err != nil {
		return nil, false, false, fmt.Errorf("failed to open file: %w", err)
	}
	defer func() { _ = file.Close() }()
	scanner := bufio.NewScanner(file)
	if !scanner.Scan() {
		return nil, false, false, fmt.Errorf("empty file")
	}
	header := strings.Split(scanner.Text(), "\t")
	idx, err := parseRavenHeader(header)
	if err != nil {
		return nil, false, false, err
	}
	selections, err := parseRavenSelections(scanner, idx)
	if err != nil {
		return nil, false, false, err
	}
	if len(selections) == 0 {
		return nil, false, true, nil
	}
	// Find WAV file
	wavPath := resolveWAVPath(ravenFile, cache)
	if wavPath == "" {
		return nil, false, true, nil
	}
	sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
	if err != nil {
		return nil, false, true, nil
	}
	dataPath := wavPath + ".data"
	segments := buildRavenSegments(selections, sampleRate)
	meta := AviaNZMeta{Operator: "Raven", Duration: duration}
	reviewer := "None"
	meta.Reviewer = &reviewer
	if err := writeDotDataFileSafe(dataPath, segments, "Raven", meta); err != nil {
		return nil, false, false, err
	}
	var calls []ClusteredCall
	for _, sel := range selections {
		calls = append(calls, ClusteredCall{
			File:      wavPath,
			StartTime: sel.StartTime,
			EndTime:   sel.EndTime,
			EbirdCode: sel.Species,
			Segments:  1,
		})
	}
	return calls, true, false, nil
}
// resolveWAVPath finds the WAV file corresponding to a Raven file
func resolveWAVPath(ravenFile string, cache *DirCache) string {
	baseName := deriveWAVBaseName(ravenFile)
	if cache != nil {
		return cache.FindWAV(baseName)
	}
	return findWAVFile(filepath.Dir(ravenFile), baseName)
}
// buildRavenSegments converts Raven selections to AviaNZ segments
func buildRavenSegments(selections []RavenSelection, sampleRate int) []AviaNZSegment {
	var segments []AviaNZSegment
	for _, sel := range selections {
		labels := []AviaNZLabel{
			{
				Species:   sel.Species,
				Certainty: 70, // Default certainty for Raven (no confidence metric)
				Filter:    "Raven",
			},
		}
		// Use frequency range from Raven, or full band if not specified
		freqLow := sel.FreqLow
		freqHigh := sel.FreqHigh
		if freqLow == 0 && freqHigh == 0 {
			freqHigh = float64(sampleRate)
		}
		segment := AviaNZSegment{
			sel.StartTime,
			sel.EndTime,
			freqLow,
			freqHigh,
			labels,
		}
		segments = append(segments, segment)
	}
	return segments
}

File addition: calls_from_preds_test.go (----------)

[0.67281]

package calls
import (
	"os"
	"path/filepath"
	"testing"
	"skraak/utils"
)
func TestCallsFromPreds_EmptyFilterError(t *testing.T) {
	// Create a temp CSV file
	tmpDir := t.TempDir()
	csvPath := filepath.Join(tmpDir, "preds.csv")
	csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
	if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
		t.Fatal(err)
	}
	// Create a dummy WAV file (minimal valid WAV)
	wavPath := filepath.Join(tmpDir, "test.wav")
	createMinimalWAV(t, wavPath, 44100, 10.0)
	// Test with empty filter (should error)
	input := CallsFromPredsInput{
		CSVPath:         csvPath,
		Filter:          "",
		WriteDotData:    true,
		ProgressHandler: nil,
	}
	output, err := CallsFromPreds(input)
	// Should return error
	if err == nil {
		t.Error("expected error for empty filter, got nil")
	}
	if output.Error == nil || *output.Error == "" {
		t.Error("expected error message in output, got empty")
	}
}
func TestCallsFromPreds_NewDataFile(t *testing.T) {
	// Create a temp CSV file
	tmpDir := t.TempDir()
	csvPath := filepath.Join(tmpDir, "predsST_test-filter_2025-01-01.csv")
	csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
	if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
		t.Fatal(err)
	}
	// Create a dummy WAV file
	wavPath := filepath.Join(tmpDir, "test.wav")
	createMinimalWAV(t, wavPath, 44100, 10.0)
	// Test with filter parsed from filename
	input := CallsFromPredsInput{
		CSVPath:         csvPath,
		Filter:          "", // Will parse from filename
		WriteDotData:    true,
		ProgressHandler: nil,
	}
	output, err := CallsFromPreds(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.DataFilesWritten != 1 {
		t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
	}
	if output.Filter != "test-filter" {
		t.Errorf("expected filter 'test-filter', got '%s'", output.Filter)
	}
	// Verify .data file was created
	dataPath := wavPath + ".data"
	if _, err := os.Stat(dataPath); os.IsNotExist(err) {
		t.Error("expected .data file to be created")
	}
	// Verify content
	df, err := utils.ParseDataFile(dataPath)
	if err != nil {
		t.Fatalf("failed to parse .data file: %v", err)
	}
	if len(df.Segments) != 1 {
		t.Errorf("expected 1 segment, got %d", len(df.Segments))
	}
	if len(df.Segments[0].Labels) != 1 {
		t.Errorf("expected 1 label, got %d", len(df.Segments[0].Labels))
	}
	if df.Segments[0].Labels[0].Filter != "test-filter" {
		t.Errorf("expected filter 'test-filter', got '%s'", df.Segments[0].Labels[0].Filter)
	}
}
func TestCallsFromPreds_ExistingDataFileSameFilter(t *testing.T) {
	// Create a temp CSV file
	tmpDir := t.TempDir()
	csvPath := filepath.Join(tmpDir, "predsST_existing-filter_2025-01-01.csv")
	csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
	if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
		t.Fatal(err)
	}
	// Create a dummy WAV file
	wavPath := filepath.Join(tmpDir, "test.wav")
	createMinimalWAV(t, wavPath, 44100, 10.0)
	// Create existing .data file with same filter
	dataPath := wavPath + ".data"
	existingData := `[
		{"Operator": "Manual", "Reviewer": "David", "Duration": 10.0},
		[5.0, 8.0, 0, 44100, [{"species": "morepork", "certainty": 90, "filter": "existing-filter"}]]
	]`
	if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
		t.Fatal(err)
	}
	// Test with same filter (should error)
	input := CallsFromPredsInput{
		CSVPath:         csvPath,
		Filter:          "", // Will parse from filename -> "existing-filter"
		WriteDotData:    true,
		ProgressHandler: nil,
	}
	output, err := CallsFromPreds(input)
	// Should return error
	if err == nil {
		t.Error("expected error for same filter, got nil")
	}
	if output.Error == nil {
		t.Error("expected error message in output")
	}
	// Verify original .data file is unchanged
	df, err := utils.ParseDataFile(dataPath)
	if err != nil {
		t.Fatalf("failed to parse .data file: %v", err)
	}
	if len(df.Segments) != 1 {
		t.Errorf("expected original 1 segment, got %d", len(df.Segments))
	}
	if df.Segments[0].Labels[0].Species != "morepork" {
		t.Errorf("expected original species 'morepork', got '%s'", df.Segments[0].Labels[0].Species)
	}
}
func TestCallsFromPreds_ExistingDataFileDifferentFilter(t *testing.T) {
	// Create a temp CSV file
	tmpDir := t.TempDir()
	csvPath := filepath.Join(tmpDir, "predsST_new-filter_2025-01-01.csv")
	csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
	if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
		t.Fatal(err)
	}
	// Create a dummy WAV file
	wavPath := filepath.Join(tmpDir, "test.wav")
	createMinimalWAV(t, wavPath, 44100, 10.0)
	// Create existing .data file with different filter
	dataPath := wavPath + ".data"
	existingData := `[
		{"Operator": "Manual", "Reviewer": "David", "Duration": 10.0},
		[5.0, 8.0, 0, 44100, [{"species": "morepork", "certainty": 90, "filter": "old-filter"}]]
	]`
	if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
		t.Fatal(err)
	}
	// Test with different filter (should merge)
	input := CallsFromPredsInput{
		CSVPath:         csvPath,
		Filter:          "", // Will parse from filename -> "new-filter"
		WriteDotData:    true,
		ProgressHandler: nil,
	}
	output, err := CallsFromPreds(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.DataFilesWritten != 1 {
		t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
	}
	// Verify .data file has merged content
	df, err := utils.ParseDataFile(dataPath)
	if err != nil {
		t.Fatalf("failed to parse .data file: %v", err)
	}
	if len(df.Segments) != 2 {
		t.Errorf("expected 2 segments after merge, got %d", len(df.Segments))
	}
	// Check segments are sorted by start time
	if df.Segments[0].StartTime > df.Segments[1].StartTime {
		t.Error("expected segments to be sorted by start time")
	}
	// Check both filters are present
	filters := make(map[string]bool)
	for _, seg := range df.Segments {
		for _, label := range seg.Labels {
			filters[label.Filter] = true
		}
	}
	if !filters["old-filter"] {
		t.Error("expected 'old-filter' to be present")
	}
	if !filters["new-filter"] {
		t.Error("expected 'new-filter' to be present")
	}
}
func TestCallsFromPreds_ExistingDataFileParseError(t *testing.T) {
	// Create a temp CSV file
	tmpDir := t.TempDir()
	csvPath := filepath.Join(tmpDir, "predsST_test-filter_2025-01-01.csv")
	csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
	if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
		t.Fatal(err)
	}
	// Create a dummy WAV file
	wavPath := filepath.Join(tmpDir, "test.wav")
	createMinimalWAV(t, wavPath, 44100, 10.0)
	// Create corrupted .data file
	dataPath := wavPath + ".data"
	corruptedData := `this is not valid json`
	if err := os.WriteFile(dataPath, []byte(corruptedData), 0644); err != nil {
		t.Fatal(err)
	}
	// Test (should error due to parse failure)
	input := CallsFromPredsInput{
		CSVPath:         csvPath,
		Filter:          "",
		WriteDotData:    true,
		ProgressHandler: nil,
	}
	output, err := CallsFromPreds(input)
	// Should return error
	if err == nil {
		t.Error("expected error for corrupted .data file, got nil")
	}
	if output.Error == nil {
		t.Error("expected error message in output")
	}
	// Verify original file is unchanged
	content, err := os.ReadFile(dataPath)
	if err != nil {
		t.Fatal(err)
	}
	if string(content) != corruptedData {
		t.Error("expected corrupted file to remain unchanged")
	}
}
func TestCallsFromPreds_ExplicitFilter(t *testing.T) {
	// Create a temp CSV file with non-standard name
	tmpDir := t.TempDir()
	csvPath := filepath.Join(tmpDir, "predictions.csv")
	csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
	if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
		t.Fatal(err)
	}
	// Create a dummy WAV file
	wavPath := filepath.Join(tmpDir, "test.wav")
	createMinimalWAV(t, wavPath, 44100, 10.0)
	// Test with explicit filter
	input := CallsFromPredsInput{
		CSVPath:         csvPath,
		Filter:          "my-custom-filter",
		WriteDotData:    true,
		ProgressHandler: nil,
	}
	output, err := CallsFromPreds(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.Filter != "my-custom-filter" {
		t.Errorf("expected filter 'my-custom-filter', got '%s'", output.Filter)
	}
	// Verify .data file uses explicit filter
	dataPath := wavPath + ".data"
	df, err := utils.ParseDataFile(dataPath)
	if err != nil {
		t.Fatalf("failed to parse .data file: %v", err)
	}
	if df.Segments[0].Labels[0].Filter != "my-custom-filter" {
		t.Errorf("expected filter 'my-custom-filter' in .data file, got '%s'", df.Segments[0].Labels[0].Filter)
	}
}
func TestCallsFromPreds_NonParsableFilenameNoFilter(t *testing.T) {
	// Create a temp CSV file with non-standard name that can't be parsed
	tmpDir := t.TempDir()
	csvPath := filepath.Join(tmpDir, "random_name.csv")
	csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
	if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
		t.Fatal(err)
	}
	// Create a dummy WAV file
	wavPath := filepath.Join(tmpDir, "test.wav")
	createMinimalWAV(t, wavPath, 44100, 10.0)
	// Test with no filter and non-parsable filename (should error)
	input := CallsFromPredsInput{
		CSVPath:         csvPath,
		Filter:          "",
		WriteDotData:    true,
		ProgressHandler: nil,
	}
	output, err := CallsFromPreds(input)
	// Should return error
	if err == nil {
		t.Error("expected error for unparsable filename with no filter, got nil")
	}
	if output.Error == nil {
		t.Error("expected error message in output")
	}
}
// createMinimalWAV creates a minimal valid WAV file for testing
func createMinimalWAV(t *testing.T, path string, sampleRate int, duration float64) {
	t.Helper()
	numSamples := int(float64(sampleRate) * duration)
	dataSize := numSamples * 2 // 16-bit mono
	// WAV header (44 bytes)
	header := make([]byte, 44)
	// RIFF header
	copy(header[0:4], "RIFF")
	totalSize := uint32(36 + dataSize)
	header[4] = byte(totalSize)
	header[5] = byte(totalSize >> 8)
	header[6] = byte(totalSize >> 16)
	header[7] = byte(totalSize >> 24)
	copy(header[8:12], "WAVE")
	// fmt chunk
	copy(header[12:16], "fmt ")
	chunkSize := uint32(16)
	header[16] = byte(chunkSize)
	header[17] = byte(chunkSize >> 8)
	header[18] = byte(chunkSize >> 16)
	header[19] = byte(chunkSize >> 24)
	audioFormat := uint16(1) // PCM
	header[20] = byte(audioFormat)
	header[21] = byte(audioFormat >> 8)
	numChannels := uint16(1)
	header[22] = byte(numChannels)
	header[23] = byte(numChannels >> 8)
	header[24] = byte(sampleRate)
	header[25] = byte(sampleRate >> 8)
	header[26] = byte(sampleRate >> 16)
	header[27] = byte(sampleRate >> 24)
	byteRate := uint32(sampleRate * 2)
	header[28] = byte(byteRate)
	header[29] = byte(byteRate >> 8)
	header[30] = byte(byteRate >> 16)
	header[31] = byte(byteRate >> 24)
	blockAlign := uint16(2)
	header[32] = byte(blockAlign)
	header[33] = byte(blockAlign >> 8)
	bitsPerSample := uint16(16)
	header[34] = byte(bitsPerSample)
	header[35] = byte(bitsPerSample >> 8)
	// data chunk
	copy(header[36:40], "data")
	header[40] = byte(dataSize)
	header[41] = byte(dataSize >> 8)
	header[42] = byte(dataSize >> 16)
	header[43] = byte(dataSize >> 24)
	// Create file with header and silence
	file, err := os.Create(path)
	if err != nil {
		t.Fatal(err)
	}
	defer file.Close()
	if _, err := file.Write(header); err != nil {
		t.Fatal(err)
	}
	// Write silence (zeros)
	silence := make([]byte, dataSize)
	if _, err := file.Write(silence); err != nil {
		t.Fatal(err)
	}
}

File addition: calls_from_preds.go (----------)

[0.67281]

package calls
import (
	"encoding/csv"
	"encoding/json"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"sort"
	"strconv"
	"strings"
	"sync"
	"sync/atomic"
	"skraak/utils"
)
// Constants for clustering algorithm
const (
	CLUSTER_GAP_MULTIPLIER     = 2  // 3 Gap threshold = CLUSTER_GAP_MULTIPLIER * clip_duration. 3 for kiwi
	MIN_DETECTIONS_PER_CLUSTER = 0  // 1 = filter out single detections (used for kiwi, they have long calls 30s), 0 = let single detections pass through
	DEFAULT_CERTAINTY          = 70 // .data certainty:70
	DOT_DATA_WORKERS           = 8  // Number of parallel workers for .data file writing
)
// ClusteredCall represents a clustered bird call detection
type ClusteredCall struct {
	File      string  `json:"file"`
	StartTime float64 `json:"start_time"`
	EndTime   float64 `json:"end_time"`
	EbirdCode string  `json:"ebird_code"`
	Segments  int     `json:"segments"`
}
// CallsFromPredsInput defines the input for the calls-from-preds tool
type CallsFromPredsInput struct {
	CSVPath         string          `json:"csv_path"`
	Filter          string          `json:"filter"`
	WriteDotData    bool            `json:"write_dot_data"`
	GapMultiplier   int             `json:"gap_multiplier"`
	MinDetections   int             `json:"min_detections"`
	ProgressHandler ProgressHandler `json:"-"` // Optional progress callback (not serialized)
}
// ProgressHandler is a callback function for reporting progress during long operations
// processed: number of items processed so far
// total: total number of items to process
// message: optional status message
type ProgressHandler func(processed, total int, message string)
// CallsFromPredsOutput defines the output for the calls-from-preds tool
type CallsFromPredsOutput struct {
	Calls            []ClusteredCall `json:"calls"`
	TotalCalls       int             `json:"total_calls"`
	ClipDuration     float64         `json:"clip_duration"`
	GapThreshold     float64         `json:"gap_threshold"`
	SpeciesCount     map[string]int  `json:"species_count"`
	DataFilesWritten int             `json:"data_files_written"`
	DataFilesSkipped int             `json:"data_files_skipped"`
	Filter           string          `json:"filter"`
	Error            *string         `json:"error,omitempty"`
}
// AviaNZ .data file types
// predFileSpeciesKey groups detections by file and ebird code
type predFileSpeciesKey struct {
	File      string
	EbirdCode string
}
// CallsFromPreds reads a predictions CSV and clusters detections into continuous bird calls
func CallsFromPreds(input CallsFromPredsInput) (CallsFromPredsOutput, error) {
	var output CallsFromPredsOutput
	// Determine filter: use provided filter, or parse from CSV filename
	filter := input.Filter
	if filter == "" {
		filter = ParseFilterFromFilename(input.CSVPath)
	}
	if filter == "" {
		errMsg := "Filter must be specified via --filter flag or parsable from CSV filename"
		output.Error = &errMsg
		return output, fmt.Errorf("%s", errMsg)
	}
	output.Filter = filter
	_, detections, clipDuration, err := readPredCSV(input.CSVPath)
	if err != nil {
		errMsg := err.Error()
		output.Error = &errMsg
		return output, err
	}
	output.ClipDuration = clipDuration
	gapMultiplier := CLUSTER_GAP_MULTIPLIER
	if input.GapMultiplier > 0 {
		gapMultiplier = input.GapMultiplier
	}
	minDetections := MIN_DETECTIONS_PER_CLUSTER
	if input.MinDetections >= 0 {
		minDetections = input.MinDetections
	}
	gapThreshold := float64(gapMultiplier) * clipDuration
	output.GapThreshold = gapThreshold
	allCalls, speciesCount := clusterDetections(detections, clipDuration, gapThreshold, minDetections)
	output.Calls = allCalls
	output.TotalCalls = len(allCalls)
	output.SpeciesCount = speciesCount
	if input.WriteDotData {
		dataFilesWritten, dataFilesSkipped, err := writeDotFiles(input.CSVPath, filter, allCalls, input.ProgressHandler)
		if err != nil {
			errMsg := fmt.Sprintf("Error writing .data files: %v", err)
			output.Error = &errMsg
			return output, fmt.Errorf("%s", errMsg)
		}
		output.DataFilesWritten = dataFilesWritten
		output.DataFilesSkipped = dataFilesSkipped
	}
	return output, nil
}
// readPredCSV opens and reads a predictions CSV, returning column mappings, detections, and clip duration
func readPredCSV(csvPath string) (predCSVColumns, map[predFileSpeciesKey][]float64, float64, error) {
	file, err := os.Open(csvPath)
	if err != nil {
		return predCSVColumns{}, nil, 0, fmt.Errorf("failed to open CSV file: %w", err)
	}
	defer func() { _ = file.Close() }()
	reader := csv.NewReader(file)
	reader.ReuseRecord = true
	header, err := reader.Read()
	if err != nil {
		return predCSVColumns{}, nil, 0, fmt.Errorf("failed to read CSV header: %w", err)
	}
	cols, err := findPredCSVColumns(header)
	if err != nil {
		return predCSVColumns{}, nil, 0, err
	}
	detections, clipDuration, err := readPredCSVRows(reader, cols)
	if err != nil {
		return predCSVColumns{}, nil, 0, err
	}
	return cols, detections, clipDuration, nil
}
// predCSVColumns holds the column indices for a predictions CSV
type predCSVColumns struct {
	fileIdx      int
	startTimeIdx int
	endTimeIdx   int
	ebirdCodes   []string
	ebirdIdx     []int
}
// findPredCSVColumns parses the CSV header to find column indices
func findPredCSVColumns(header []string) (predCSVColumns, error) {
	cols := predCSVColumns{
		fileIdx:      -1,
		startTimeIdx: -1,
		endTimeIdx:   -1,
	}
	ignoredColumns := map[string]bool{"NotKiwi": true, "0.0": true}
	for i, col := range header {
		switch col {
		case "file":
			cols.fileIdx = i
		case "start_time":
			cols.startTimeIdx = i
		case "end_time":
			cols.endTimeIdx = i
		default:
			if ignoredColumns[col] {
				continue
			}
			cols.ebirdCodes = append(cols.ebirdCodes, col)
			cols.ebirdIdx = append(cols.ebirdIdx, i)
		}
	}
	if cols.fileIdx == -1 || cols.startTimeIdx == -1 || cols.endTimeIdx == -1 {
		return cols, fmt.Errorf("CSV must have 'file', 'start_time', and 'end_time' columns")
	}
	if len(cols.ebirdCodes) == 0 {
		return cols, fmt.Errorf("CSV must have at least one ebird code column")
	}
	return cols, nil
}
// readPredCSVRows reads all CSV data rows and returns detections grouped by file+species, plus clip duration
func readPredCSVRows(reader *csv.Reader, cols predCSVColumns) (map[predFileSpeciesKey][]float64, float64, error) {
	detections := make(map[predFileSpeciesKey][]float64)
	clipDuration := 0.0
	record, err := reader.Read()
	if err == io.EOF {
		return detections, 0, nil
	}
	if err != nil {
		return nil, 0, fmt.Errorf("failed to read first CSV row: %w", err)
	}
	startTime, _ := strconv.ParseFloat(record[cols.startTimeIdx], 64)
	endTime, _ := strconv.ParseFloat(record[cols.endTimeIdx], 64)
	clipDuration = endTime - startTime
	addDetectionsFromRow(record, cols, startTime, detections)
	for {
		record, err := reader.Read()
		if err == io.EOF {
			break
		}
		if err != nil {
			return nil, 0, fmt.Errorf("failed to read CSV row: %w", err)
		}
		startTime, _ = strconv.ParseFloat(record[cols.startTimeIdx], 64)
		addDetectionsFromRow(record, cols, startTime, detections)
	}
	return detections, clipDuration, nil
}
// addDetectionsFromRow adds positive detections from a single CSV row
func addDetectionsFromRow(record []string, cols predCSVColumns, startTime float64, detections map[predFileSpeciesKey][]float64) {
	fileName := record[cols.fileIdx]
	for i, idx := range cols.ebirdIdx {
		if record[idx] == "1" {
			key := predFileSpeciesKey{File: fileName, EbirdCode: cols.ebirdCodes[i]}
			detections[key] = append(detections[key], startTime)
		}
	}
}
// clusterDetections groups detections into clusters and produces sorted ClusteredCalls
func clusterDetections(detections map[predFileSpeciesKey][]float64, clipDuration, gapThreshold float64, minDetections int) ([]ClusteredCall, map[string]int) {
	var allCalls []ClusteredCall
	speciesCount := make(map[string]int)
	for key, startTimes := range detections {
		sort.Float64s(startTimes)
		clusters := clusterStartTimes(startTimes, gapThreshold)
		for _, cluster := range clusters {
			if len(cluster) <= minDetections {
				continue
			}
			call := ClusteredCall{
				File:      key.File,
				StartTime: cluster[0],
				EndTime:   cluster[len(cluster)-1] + clipDuration,
				EbirdCode: key.EbirdCode,
				Segments:  len(cluster),
			}
			allCalls = append(allCalls, call)
			speciesCount[key.EbirdCode]++
		}
	}
	sort.Slice(allCalls, func(i, j int) bool {
		if allCalls[i].File != allCalls[j].File {
			return allCalls[i].File < allCalls[j].File
		}
		return allCalls[i].StartTime < allCalls[j].StartTime
	})
	return allCalls, speciesCount
}
// DirCache caches directory entries for fast WAV file lookup.
// Scans the directory once and builds a map from lowercased basename to full filename.
// Safe for concurrent read-only use after construction.
type DirCache struct {
	dir    string
	wavMap map[string]string // lowercase basename -> filename with original case (e.g. "20230610_150000" -> "20230610_150000.WAV")
	dirMap map[string]string // lowercase basename -> filename for any file (used by from-raven for .selections.txt etc.)
}
// NewDirCache creates a DirCache by scanning the directory once.
func NewDirCache(dir string) *DirCache {
	entries, err := os.ReadDir(dir)
	if err != nil {
		return &DirCache{dir: dir, wavMap: make(map[string]string), dirMap: make(map[string]string)}
	}
	wavMap := make(map[string]string, len(entries))
	dirMap := make(map[string]string, len(entries))
	for _, entry := range entries {
		if entry.IsDir() {
			continue
		}
		name := entry.Name()
		ext := filepath.Ext(name)
		base := strings.TrimSuffix(name, ext)
		dirMap[strings.ToLower(base)] = name
		if strings.EqualFold(ext, ".wav") {
			wavMap[strings.ToLower(base)] = name
		}
	}
	return &DirCache{dir: dir, wavMap: wavMap, dirMap: dirMap}
}
// FindWAV looks up a WAV file by basename (case-insensitive).
// Returns the full path with correct case, or empty string if not found.
func (dc *DirCache) FindWAV(baseName string) string {
	if name, ok := dc.wavMap[strings.ToLower(baseName)]; ok {
		return filepath.Join(dc.dir, name)
	}
	return ""
}
// FindFile looks up any file by basename (case-insensitive).
// Returns the full path with correct case, or empty string if not found.
func (dc *DirCache) FindFile(baseName string) string {
	if name, ok := dc.dirMap[strings.ToLower(baseName)]; ok {
		return filepath.Join(dc.dir, name)
	}
	return ""
}
// findWAVFile finds a WAV file in the directory with case-insensitive matching.
// baseName is the filename without extension (e.g., "20230610_150000").
// Returns the full path with correct case, or empty string if not found.
// Deprecated: Use DirCache.FindWAV for batch operations to avoid repeated directory scans.
func findWAVFile(dir, baseName string) string {
	entries, err := os.ReadDir(dir)
	if err != nil {
		return ""
	}
	for _, entry := range entries {
		if entry.IsDir() {
			continue
		}
		name := entry.Name()
		ext := filepath.Ext(name)
		nameNoExt := strings.TrimSuffix(name, ext)
		if nameNoExt == baseName && strings.EqualFold(ext, ".wav") {
			return filepath.Join(dir, name)
		}
	}
	return ""
}
// writeDotFiles writes AviaNZ .data files for each audio file with calls
// Uses parallel workers for improved performance on large batches
func writeDotFiles(csvPath, filter string, calls []ClusteredCall, progress ProgressHandler) (int, int, error) {
	// Base directory is the directory containing the CSV file
	csvDir := filepath.Dir(csvPath)
	// Group calls by file (using extracted filename)
	callsByFile := make(map[string][]ClusteredCall)
	for _, call := range calls {
		filename := filepath.Base(call.File)
		callsByFile[filename] = append(callsByFile[filename], call)
	}
	// Report initial progress
	if progress != nil {
		progress(0, len(callsByFile), "Processing WAV files")
	}
	// If small batch, process sequentially (avoid goroutine overhead)
	if len(callsByFile) < 10 {
		return writeDotFilesSequential(csvDir, filter, callsByFile, progress)
	}
	// Parallel processing for larger batches
	return writeDotFilesParallel(csvDir, filter, callsByFile, progress)
}
// dotDataJob represents a single file to process
type dotDataJob struct {
	filename  string
	fileCalls []ClusteredCall
}
// dotDataResult represents the result of processing a single file
type dotDataResult struct {
	filename string
	written  bool
	err      error
}
// writeDotFilesSequential processes files one at a time (for small batches)
func writeDotFilesSequential(csvDir, filter string, callsByFile map[string][]ClusteredCall, progress ProgressHandler) (int, int, error) {
	dataFilesWritten := 0
	dataFilesSkipped := 0
	total := len(callsByFile)
	processed := 0
	for filename, fileCalls := range callsByFile {
		// Find WAV file with correct case
		baseName := strings.TrimSuffix(filename, filepath.Ext(filename))
		wavPath := findWAVFile(csvDir, baseName)
		if wavPath == "" {
			dataFilesSkipped++
			processed++
			if progress != nil {
				progress(processed, total, "")
			}
			continue
		}
		dataPath := wavPath + ".data"
		sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
		if err != nil {
			dataFilesSkipped++
			processed++
			if progress != nil {
				progress(processed, total, "")
			}
			continue
		}
		// Build segments and metadata
		meta, segments := buildAviaNZMetaAndSegments(fileCalls, filter, duration, sampleRate)
		if err := writeDotDataFileSafe(dataPath, segments, filter, meta); err != nil {
			return dataFilesWritten, dataFilesSkipped, fmt.Errorf("failed to write %s: %w", dataPath, err)
		}
		dataFilesWritten++
		processed++
		if progress != nil {
			progress(processed, total, "")
		}
	}
	return dataFilesWritten, dataFilesSkipped, nil
}
// writeDotFilesParallel processes files concurrently using a worker pool
func writeDotFilesParallel(csvDir, filter string, callsByFile map[string][]ClusteredCall, progress ProgressHandler) (int, int, error) {
	total := len(callsByFile)
	var processed atomic.Int32
	// Create job channel
	jobs := make(chan dotDataJob, len(callsByFile))
	results := make(chan dotDataResult, len(callsByFile))
	// Start workers
	var wg sync.WaitGroup
	for range DOT_DATA_WORKERS {
		wg.Add(1)
		go dotDataWorker(csvDir, filter, jobs, results, &wg)
	}
	// Send jobs
	for filename, fileCalls := range callsByFile {
		jobs <- dotDataJob{filename: filename, fileCalls: fileCalls}
	}
	close(jobs)
	// Wait for workers to finish
	go func() {
		wg.Wait()
		close(results)
	}()
	// Collect results with progress reporting
	dataFilesWritten := 0
	dataFilesSkipped := 0
	var firstErr error
	for result := range results {
		if result.err != nil && firstErr == nil {
			firstErr = result.err
		}
		if result.written {
			dataFilesWritten++
		} else {
			dataFilesSkipped++
		}
		// Report progress
		if progress != nil {
			current := int(processed.Add(1))
			progress(current, total, "")
		}
	}
	return dataFilesWritten, dataFilesSkipped, firstErr
}
// dotDataWorker processes files from the jobs channel
func dotDataWorker(csvDir, filter string, jobs <-chan dotDataJob, results chan<- dotDataResult, wg *sync.WaitGroup) {
	defer wg.Done()
	for job := range jobs {
		// Find WAV file with correct case
		baseName := strings.TrimSuffix(job.filename, filepath.Ext(job.filename))
		wavPath := findWAVFile(csvDir, baseName)
		if wavPath == "" {
			results <- dotDataResult{filename: job.filename, written: false, err: nil}
			continue
		}
		dataPath := wavPath + ".data"
		sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
		if err != nil {
			results <- dotDataResult{filename: job.filename, written: false, err: nil}
			continue
		}
		// Build segments and metadata
		meta, segments := buildAviaNZMetaAndSegments(job.fileCalls, filter, duration, sampleRate)
		if err := writeDotDataFileSafe(dataPath, segments, filter, meta); err != nil {
			results <- dotDataResult{filename: job.filename, written: false, err: fmt.Errorf("failed to write %s: %w", dataPath, err)}
			continue
		}
		results <- dotDataResult{filename: job.filename, written: true, err: nil}
	}
}
// buildAviaNZMetaAndSegments creates metadata and segments for a .data file
func buildAviaNZMetaAndSegments(calls []ClusteredCall, filter string, duration float64, sampleRate int) (AviaNZMeta, []AviaNZSegment) {
	// Create metadata
	reviewer := "None"
	meta := AviaNZMeta{
		Operator: "Auto",
		Reviewer: &reviewer,
		Duration: duration,
	}
	// Build segments array
	var segments []AviaNZSegment
	for _, call := range calls {
		// Create labels for this segment
		labels := []AviaNZLabel{
			{
				Species:   call.EbirdCode,
				Certainty: DEFAULT_CERTAINTY,
				Filter:    filter,
			},
		}
		// Create segment: [start, end, freq_low, freq_high, labels]
		// freq_low=0, freq_high=sampleRate for full-band segments
		segment := AviaNZSegment{
			call.StartTime,
			call.EndTime,
			0,          // freq_low
			sampleRate, // freq_high (full band)
			labels,
		}
		segments = append(segments, segment)
	}
	return meta, segments
}
// writeAviaNZDataFile writes a new .data file to disk (does not check for existing files)
func writeAviaNZDataFile(path string, data []any) error {
	file, err := os.Create(path)
	if err != nil {
		return fmt.Errorf("failed to create file: %w", err)
	}
	defer func() { _ = file.Close() }()
	encoder := json.NewEncoder(file)
	encoder.SetIndent("", "") // No indentation for compact output
	if err := encoder.Encode(data); err != nil {
		return fmt.Errorf("failed to encode JSON: %w", err)
	}
	return nil
}
// writeDotDataFileSafe safely writes or merges .data files
// - If file doesn't exist: write new file
// - If file exists with same filter: return error (refuse to clobber)
// - If file exists with different filter: merge segments and write
// - If file exists but can't be parsed: return error (refuse to clobber)
func writeDotDataFileSafe(path string, newSegments []AviaNZSegment, filter string, meta AviaNZMeta) error {
	// Check if file exists
	if _, err := os.Stat(path); err == nil {
		// File exists - parse and check
		existing, err := utils.ParseDataFile(path)
		if err != nil {
			return fmt.Errorf("cannot parse existing %s: %w (refusing to clobber)", path, err)
		}
		// Check for duplicate filter
		for _, seg := range existing.Segments {
			if seg.HasFilterLabel(filter) {
				return fmt.Errorf("%s already contains filter '%s' (refusing to clobber)", path, filter)
			}
		}
		// Append new segments (different filter - safe to merge)
		for _, newSeg := range newSegments {
			seg := convertAviaNZSegment(newSeg, filter)
			existing.Segments = append(existing.Segments, seg)
		}
		// Sort by start time
		sort.Slice(existing.Segments, func(i, j int) bool {
			return existing.Segments[i].StartTime < existing.Segments[j].StartTime
		})
		return existing.Write(path)
	}
	// File doesn't exist - write new
	data := buildDataFileFromSegments(meta, newSegments)
	return writeAviaNZDataFile(path, data)
}
// convertAviaNZSegment converts an AviaNZSegment to utils.Segment
func convertAviaNZSegment(seg AviaNZSegment, filter string) *utils.Segment {
	labels := seg[4].([]AviaNZLabel)
	utilsLabels := make([]*utils.Label, len(labels))
	for i, l := range labels {
		utilsLabels[i] = &utils.Label{
			Species:   l.Species,
			Certainty: l.Certainty,
			Filter:    filter,
		}
	}
	// Handle freq values (could be int or float64 depending on how they were created)
	var freqLow, freqHigh float64
	switch v := seg[2].(type) {
	case int:
		freqLow = float64(v)
	case float64:
		freqLow = v
	}
	switch v := seg[3].(type) {
	case int:
		freqHigh = float64(v)
	case float64:
		freqHigh = v
	}
	return &utils.Segment{
		StartTime: seg[0].(float64),
		EndTime:   seg[1].(float64),
		FreqLow:   freqLow,
		FreqHigh:  freqHigh,
		Labels:    utilsLabels,
	}
}
// buildDataFileFromSegments builds the data file structure from meta and segments
func buildDataFileFromSegments(meta AviaNZMeta, segments []AviaNZSegment) []any {
	result := make([]any, 0, 1+len(segments))
	result = append(result, meta)
	for _, seg := range segments {
		result = append(result, seg)
	}
	return result
}
// ParseFilterFromFilename extracts filter name from preds CSV filename
// "predsST_opensoundscape-kiwi-1.2_2025-11-12.csv" -> "opensoundscape-kiwi-1.2"
// Returns empty string if parsing fails
func ParseFilterFromFilename(csvPath string) string {
	filename := filepath.Base(csvPath)
	// Remove .csv extension
	name := strings.TrimSuffix(filename, ".csv")
	// Split on underscore
	parts := strings.Split(name, "_")
	if len(parts) == 3 {
		return parts[1]
	}
	return ""
}
// clusterStartTimes groups consecutive start times into clusters
// where the gap between consecutive times is <= gapThreshold
func clusterStartTimes(startTimes []float64, gapThreshold float64) [][]float64 {
	if len(startTimes) == 0 {
		return nil
	}
	var clusters [][]float64
	currentCluster := []float64{startTimes[0]}
	for i := 1; i < len(startTimes); i++ {
		gap := startTimes[i] - startTimes[i-1]
		if gap <= gapThreshold {
			// Same cluster
			currentCluster = append(currentCluster, startTimes[i])
		} else {
			// New cluster
			clusters = append(clusters, currentCluster)
			currentCluster = []float64{startTimes[i]}
		}
	}
	// Don't forget the last cluster
	clusters = append(clusters, currentCluster)
	return clusters
}

File addition: calls_from_common.go (----------)

[0.67281]

package calls
import (
	"fmt"
	"os"
	"path/filepath"
	"sort"
	"sync"
	"sync/atomic"
)
// CallsFromSourceInput defines the common input for calls-from-source tools
type CallsFromSourceInput struct {
	Folder          string          `json:"folder"`
	File            string          `json:"file"`
	Delete          bool            `json:"delete"`
	ProgressHandler ProgressHandler `json:"-"` // Optional progress callback
}
// CallsFromSourceOutput defines the common output for calls-from-source tools
type CallsFromSourceOutput struct {
	Calls            []ClusteredCall `json:"calls"`
	TotalCalls       int             `json:"total_calls"`
	SpeciesCount     map[string]int  `json:"species_count"`
	DataFilesWritten int             `json:"data_files_written"`
	DataFilesSkipped int             `json:"data_files_skipped"`
	FilesProcessed   int             `json:"files_processed"`
	FilesDeleted     int             `json:"files_deleted"`
	Filter           string          `json:"filter"`
	Error            *string         `json:"error,omitempty"`
}
// CallSource abstracts a source of bird call data (Raven, BirdNET, etc.)
type CallSource interface {
	// Name returns the display name (e.g. "Raven", "BirdNET")
	Name() string
	// FindFiles discovers source files in the given folder
	FindFiles(folder string) ([]string, error)
	// ProcessFile processes a single source file and returns calls, write/skip status
	ProcessFile(path string, cache *DirCache) (calls []ClusteredCall, written, skipped bool, err error)
}
// callsFromSource is the shared entry point for all call source tools.
func callsFromSource(src CallSource, input CallsFromSourceInput) (CallsFromSourceOutput, error) {
	var output CallsFromSourceOutput
	output.Filter = src.Name()
	// Collect source files to process
	var files []string
	if input.File != "" {
		files = []string{input.File}
	} else if input.Folder != "" {
		var err error
		files, err = src.FindFiles(input.Folder)
		if err != nil {
			errMsg := fmt.Sprintf("Failed to find %s files: %v", src.Name(), err)
			output.Error = &errMsg
			return output, fmt.Errorf("%s", errMsg)
		}
	} else {
		errMsg := "Either --folder or --file must be specified"
		output.Error = &errMsg
		return output, fmt.Errorf("%s", errMsg)
	}
	if len(files) == 0 {
		errMsg := fmt.Sprintf("No %s files found", src.Name())
		output.Error = &errMsg
		return output, fmt.Errorf("%s", errMsg)
	}
	// Single file or small batch: process sequentially (avoid goroutine overhead)
	if len(files) < 10 {
		return callsFromSourceSequential(src, input, files)
	}
	// Large batch: parallel processing with DirCache
	return callsFromSourceParallel(src, input, files)
}
// callsFromSourceSequential processes source files one at a time (for small batches)
func callsFromSourceSequential(src CallSource, input CallsFromSourceInput, files []string) (CallsFromSourceOutput, error) {
	var output CallsFromSourceOutput
	output.Filter = src.Name()
	// Build DirCache once for the folder
	dirCaches := make(map[string]*DirCache)
	if input.Folder != "" {
		dirCaches[input.Folder] = NewDirCache(input.Folder)
	}
	speciesCount := make(map[string]int)
	var allCalls []ClusteredCall
	dataFilesWritten := 0
	dataFilesSkipped := 0
	filesProcessed := 0
	filesDeleted := 0
	for _, file := range files {
		dir := filepath.Dir(file)
		cache := dirCaches[dir]
		if cache == nil {
			cache = NewDirCache(dir)
			dirCaches[dir] = cache
		}
		calls, written, skipped, err := src.ProcessFile(file, cache)
		if err != nil {
			errMsg := fmt.Sprintf("Error processing %s: %v", file, err)
			output.Error = &errMsg
			return output, fmt.Errorf("%s", errMsg)
		}
		if written {
			dataFilesWritten++
		}
		if skipped {
			dataFilesSkipped++
		}
		for _, call := range calls {
			allCalls = append(allCalls, call)
			speciesCount[call.EbirdCode]++
		}
		filesProcessed++
		// Delete if requested and successfully processed
		if input.Delete && written {
			if err := os.Remove(file); err != nil {
				errMsg := fmt.Sprintf("Failed to delete %s: %v", file, err)
				output.Error = &errMsg
				return output, fmt.Errorf("%s", errMsg)
			}
			filesDeleted++
		}
		if input.ProgressHandler != nil {
			input.ProgressHandler(filesProcessed, len(files), filepath.Base(file))
		}
	}
	// Sort all calls by file, then start time
	sort.Slice(allCalls, func(i, j int) bool {
		if allCalls[i].File != allCalls[j].File {
			return allCalls[i].File < allCalls[j].File
		}
		return allCalls[i].StartTime < allCalls[j].StartTime
	})
	output.Calls = allCalls
	output.TotalCalls = len(allCalls)
	output.SpeciesCount = speciesCount
	output.DataFilesWritten = dataFilesWritten
	output.DataFilesSkipped = dataFilesSkipped
	output.FilesProcessed = filesProcessed
	output.FilesDeleted = filesDeleted
	return output, nil
}
// sourceJob represents a single file to process (generic over CallSource)
type sourceJob struct {
	filePath string
}
// sourceResult represents the result of processing a single source file
type sourceResult struct {
	path    string
	calls   []ClusteredCall
	written bool
	skipped bool
	err     error
}
func (r sourceResult) filePath() string          { return r.path }
func (r sourceResult) getCalls() []ClusteredCall { return r.calls }
func (r sourceResult) wasWritten() bool          { return r.written }
func (r sourceResult) wasSkipped() bool          { return r.skipped }
func (r sourceResult) getError() error           { return r.err }
// callsFromSourceParallel processes source files concurrently using a worker pool and DirCache
func callsFromSourceParallel(src CallSource, input CallsFromSourceInput, files []string) (CallsFromSourceOutput, error) {
	var output CallsFromSourceOutput
	output.Filter = src.Name()
	total := len(files)
	var processed atomic.Int32
	// Build DirCache for the folder
	dirCaches := &sync.Map{}
	if input.Folder != "" {
		cache := NewDirCache(input.Folder)
		dirCaches.Store(input.Folder, cache)
	}
	// Create job and result channels
	jobs := make(chan sourceJob, total)
	results := make(chan parallelResult, total)
	// Start workers
	var wg sync.WaitGroup
	for range DOT_DATA_WORKERS {
		wg.Add(1)
		go sourceWorker(src, dirCaches, jobs, results, &wg)
	}
	// Send jobs
	for _, file := range files {
		jobs <- sourceJob{filePath: file}
	}
	close(jobs)
	// Wait for workers to finish, then close results
	go func() {
		wg.Wait()
		close(results)
	}()
	// Collect results with progress reporting
	stats := aggregateResults(results, total, &processed, input.Delete, input.ProgressHandler)
	if stats.firstErr != nil {
		errMsg := stats.firstErr.Error()
		output.Error = &errMsg
		return output, stats.firstErr
	}
	sortCallsByFileAndTime(stats.calls)
	output.Calls = stats.calls
	output.TotalCalls = len(stats.calls)
	output.SpeciesCount = stats.speciesCount
	output.DataFilesWritten = stats.dataFilesWritten
	output.DataFilesSkipped = stats.dataFilesSkipped
	output.FilesProcessed = stats.filesProcessed
	output.FilesDeleted = stats.filesDeleted
	return output, nil
}
// sourceWorker processes source files from the jobs channel
func sourceWorker(src CallSource, dirCaches *sync.Map, jobs <-chan sourceJob, results chan<- parallelResult, wg *sync.WaitGroup) {
	defer wg.Done()
	for job := range jobs {
		dir := filepath.Dir(job.filePath)
		// Get or create DirCache for this directory
		var cache *DirCache
		if cached, ok := dirCaches.Load(dir); ok {
			cache = cached.(*DirCache)
		} else {
			cache = NewDirCache(dir)
			dirCaches.Store(dir, cache)
		}
		calls, written, skipped, err := src.ProcessFile(job.filePath, cache)
		results <- sourceResult{
			path:    job.filePath,
			calls:   calls,
			written: written,
			skipped: skipped,
			err:     err,
		}
	}
}

File addition: calls_from_birda_raven_test.go (----------)

[0.67281]

package calls
import (
	"os"
	"path/filepath"
	"testing"
	"skraak/utils"
)
// ============================================
// BirdNET Tests
// ============================================
func TestCallsFromBirda_NewDataFile(t *testing.T) {
	tmpDir := t.TempDir()
	// Create a minimal WAV file
	wavPath := filepath.Join(tmpDir, "test.WAV")
	createMinimalWAV(t, wavPath, 16000, 60.0)
	// Create BirdNET results file
	birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
	birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Turdus migratorius,American Robin,0.85,/some/path/test.WAV\n"
	if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
		t.Fatal(err)
	}
	input := CallsFromBirdaInput{
		File: birdaPath,
	}
	output, err := CallsFromBirda(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.DataFilesWritten != 1 {
		t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
	}
	if output.Filter != "BirdNET" {
		t.Errorf("expected filter 'BirdNET', got '%s'", output.Filter)
	}
	if output.TotalCalls != 1 {
		t.Errorf("expected 1 call, got %d", output.TotalCalls)
	}
	// Verify .data file was created
	dataPath := wavPath + ".data"
	df, err := utils.ParseDataFile(dataPath)
	if err != nil {
		t.Fatalf("failed to parse .data file: %v", err)
	}
	if len(df.Segments) != 1 {
		t.Errorf("expected 1 segment, got %d", len(df.Segments))
	}
	if df.Segments[0].Labels[0].Filter != "BirdNET" {
		t.Errorf("expected filter 'BirdNET', got '%s'", df.Segments[0].Labels[0].Filter)
	}
	if df.Segments[0].Labels[0].Certainty != 85 {
		t.Errorf("expected certainty 85, got %d", df.Segments[0].Labels[0].Certainty)
	}
}
func TestCallsFromBirda_ExistingSameFilter(t *testing.T) {
	tmpDir := t.TempDir()
	wavPath := filepath.Join(tmpDir, "test.WAV")
	createMinimalWAV(t, wavPath, 16000, 60.0)
	dataPath := wavPath + ".data"
	existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Existing Bird", "certainty": 90, "filter": "BirdNET"}]]]`
	if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
		t.Fatal(err)
	}
	birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
	birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,New Bird,New Bird,0.85,test.WAV\n"
	if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
		t.Fatal(err)
	}
	input := CallsFromBirdaInput{File: birdaPath}
	output, err := CallsFromBirda(input)
	if err == nil {
		t.Error("expected error for same filter, got nil")
	}
	if output.Error == nil {
		t.Error("expected error message in output")
	}
}
func TestCallsFromBirda_ExistingDifferentFilter(t *testing.T) {
	tmpDir := t.TempDir()
	wavPath := filepath.Join(tmpDir, "test.WAV")
	createMinimalWAV(t, wavPath, 16000, 60.0)
	dataPath := wavPath + ".data"
	existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Kiwi", "certainty": 90, "filter": "Manual"}]]]`
	if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
		t.Fatal(err)
	}
	birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
	birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Robin,Robin,0.85,test.WAV\n"
	if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
		t.Fatal(err)
	}
	input := CallsFromBirdaInput{File: birdaPath}
	output, err := CallsFromBirda(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.DataFilesWritten != 1 {
		t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
	}
	df, err := utils.ParseDataFile(dataPath)
	if err != nil {
		t.Fatalf("failed to parse .data file: %v", err)
	}
	if len(df.Segments) != 2 {
		t.Errorf("expected 2 segments after merge, got %d", len(df.Segments))
	}
}
func TestCallsFromBirda_DeleteOption(t *testing.T) {
	tmpDir := t.TempDir()
	wavPath := filepath.Join(tmpDir, "test.WAV")
	createMinimalWAV(t, wavPath, 16000, 60.0)
	birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
	birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Robin,Robin,0.85,test.WAV\n"
	if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
		t.Fatal(err)
	}
	input := CallsFromBirdaInput{File: birdaPath, Delete: true}
	output, err := CallsFromBirda(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.FilesDeleted != 1 {
		t.Errorf("expected 1 file deleted, got %d", output.FilesDeleted)
	}
	if _, err := os.Stat(birdaPath); !os.IsNotExist(err) {
		t.Error("expected BirdNET file to be deleted")
	}
}
func TestCallsFromBirda_FolderMode(t *testing.T) {
	tmpDir := t.TempDir()
	for i := range 2 {
		wavPath := filepath.Join(tmpDir, "test"+string(rune('0'+i))+".WAV")
		createMinimalWAV(t, wavPath, 16000, 60.0)
		birdaPath := filepath.Join(tmpDir, "test"+string(rune('0'+i))+".BirdNET.results.csv")
		birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Bird,Bird,0.85,test.WAV\n"
		if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
			t.Fatal(err)
		}
	}
	input := CallsFromBirdaInput{Folder: tmpDir}
	output, err := CallsFromBirda(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.FilesProcessed != 2 {
		t.Errorf("expected 2 files processed, got %d", output.FilesProcessed)
	}
	if output.DataFilesWritten != 2 {
		t.Errorf("expected 2 data files written, got %d", output.DataFilesWritten)
	}
}
// ============================================
// Raven Tests
// ============================================
func TestCallsFromRaven_NewDataFile(t *testing.T) {
	tmpDir := t.TempDir()
	wavPath := filepath.Join(tmpDir, "test.WAV")
	createMinimalWAV(t, wavPath, 16000, 60.0)
	ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
	ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tKiwi\n"
	if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
		t.Fatal(err)
	}
	input := CallsFromRavenInput{File: ravenPath}
	output, err := CallsFromRaven(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.DataFilesWritten != 1 {
		t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
	}
	if output.Filter != "Raven" {
		t.Errorf("expected filter 'Raven', got '%s'", output.Filter)
	}
	dataPath := wavPath + ".data"
	df, err := utils.ParseDataFile(dataPath)
	if err != nil {
		t.Fatalf("failed to parse .data file: %v", err)
	}
	if df.Segments[0].FreqLow != 1000 {
		t.Errorf("expected freq_low 1000, got %f", df.Segments[0].FreqLow)
	}
	if df.Segments[0].FreqHigh != 5000 {
		t.Errorf("expected freq_high 5000, got %f", df.Segments[0].FreqHigh)
	}
}
func TestCallsFromRaven_ExistingSameFilter(t *testing.T) {
	tmpDir := t.TempDir()
	wavPath := filepath.Join(tmpDir, "test.WAV")
	createMinimalWAV(t, wavPath, 16000, 60.0)
	dataPath := wavPath + ".data"
	existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Existing", "certainty": 90, "filter": "Raven"}]]]`
	if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
		t.Fatal(err)
	}
	ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
	ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tNew\n"
	if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
		t.Fatal(err)
	}
	input := CallsFromRavenInput{File: ravenPath}
	output, err := CallsFromRaven(input)
	if err == nil {
		t.Error("expected error for same filter, got nil")
	}
	if output.Error == nil {
		t.Error("expected error message in output")
	}
}
func TestCallsFromRaven_ExistingDifferentFilter(t *testing.T) {
	tmpDir := t.TempDir()
	wavPath := filepath.Join(tmpDir, "test.WAV")
	createMinimalWAV(t, wavPath, 16000, 60.0)
	dataPath := wavPath + ".data"
	existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Kiwi", "certainty": 90, "filter": "BirdNET"}]]]`
	if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
		t.Fatal(err)
	}
	ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
	ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tMorepork\n"
	if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
		t.Fatal(err)
	}
	input := CallsFromRavenInput{File: ravenPath}
	output, err := CallsFromRaven(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.DataFilesWritten != 1 {
		t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
	}
	df, err := utils.ParseDataFile(dataPath)
	if err != nil {
		t.Fatalf("failed to parse .data file: %v", err)
	}
	if len(df.Segments) != 2 {
		t.Errorf("expected 2 segments after merge, got %d", len(df.Segments))
	}
}
func TestCallsFromRaven_DeleteOption(t *testing.T) {
	tmpDir := t.TempDir()
	wavPath := filepath.Join(tmpDir, "test.WAV")
	createMinimalWAV(t, wavPath, 16000, 60.0)
	ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
	ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tKiwi\n"
	if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
		t.Fatal(err)
	}
	input := CallsFromRavenInput{File: ravenPath, Delete: true}
	output, err := CallsFromRaven(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.FilesDeleted != 1 {
		t.Errorf("expected 1 file deleted, got %d", output.FilesDeleted)
	}
	if _, err := os.Stat(ravenPath); !os.IsNotExist(err) {
		t.Error("expected Raven file to be deleted")
	}
}
func TestCallsFromRaven_MultipleSelections(t *testing.T) {
	tmpDir := t.TempDir()
	wavPath := filepath.Join(tmpDir, "test.WAV")
	createMinimalWAV(t, wavPath, 16000, 60.0)
	ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
	ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tKiwi\n2\tSpectrogram 1\t1\t10.0\t15.0\t2000\t6000\tMorepork\n3\tSpectrogram 1\t1\t20.0\t25.0\t1500\t4500\tTui\n"
	if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
		t.Fatal(err)
	}
	input := CallsFromRavenInput{File: ravenPath}
	output, err := CallsFromRaven(input)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if output.TotalCalls != 3 {
		t.Errorf("expected 3 calls, got %d", output.TotalCalls)
	}
	if output.SpeciesCount["Kiwi"] != 1 || output.SpeciesCount["Morepork"] != 1 || output.SpeciesCount["Tui"] != 1 {
		t.Errorf("unexpected species count: %v", output.SpeciesCount)
	}
}

File addition: calls_from_birda.go (----------)

[0.67281]

package calls
import (
	"encoding/csv"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"strconv"
	"strings"
	"skraak/utils"
)
// CallsFromBirdaInput defines the input for the calls-from-birda tool
type CallsFromBirdaInput struct {
	Folder          string          `json:"folder"`
	File            string          `json:"file"`
	Delete          bool            `json:"delete"`
	ProgressHandler ProgressHandler `json:"-"` // Optional progress callback
}
// CallsFromBirdaOutput defines the output for the calls-from-birda tool
type CallsFromBirdaOutput struct {
	Calls            []ClusteredCall `json:"calls"`
	TotalCalls       int             `json:"total_calls"`
	SpeciesCount     map[string]int  `json:"species_count"`
	DataFilesWritten int             `json:"data_files_written"`
	DataFilesSkipped int             `json:"data_files_skipped"`
	FilesProcessed   int             `json:"files_processed"`
	FilesDeleted     int             `json:"files_deleted"`
	Filter           string          `json:"filter"`
	Error            *string         `json:"error,omitempty"`
}
// birdaSource implements CallSource for BirdNET results files
type birdaSource struct{}
func (birdaSource) Name() string { return "BirdNET" }
func (birdaSource) FindFiles(folder string) ([]string, error) {
	var files []string
	entries, err := os.ReadDir(folder)
	if err != nil {
		return nil, err
	}
	for _, entry := range entries {
		name := entry.Name()
		if strings.HasSuffix(name, ".BirdNET.results.csv") {
			files = append(files, filepath.Join(folder, name))
		}
	}
	return files, nil
}
func (birdaSource) ProcessFile(birdaFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
	return processBirdaFileCached(birdaFile, cache)
}
// CallsFromBirda processes BirdNET results files and writes .data files
func CallsFromBirda(input CallsFromBirdaInput) (CallsFromBirdaOutput, error) {
	src := birdaSource{}
	commonInput := CallsFromSourceInput(input)
	commonOutput, err := callsFromSource(src, commonInput)
	// Convert to Birda-specific output type
	var output CallsFromBirdaOutput
	output.Calls = commonOutput.Calls
	output.TotalCalls = commonOutput.TotalCalls
	output.SpeciesCount = commonOutput.SpeciesCount
	output.DataFilesWritten = commonOutput.DataFilesWritten
	output.DataFilesSkipped = commonOutput.DataFilesSkipped
	output.FilesProcessed = commonOutput.FilesProcessed
	output.FilesDeleted = commonOutput.FilesDeleted
	output.Filter = commonOutput.Filter
	output.Error = commonOutput.Error
	return output, err
}
// BirdNETDetection represents a single BirdNET detection
type BirdNETDetection struct {
	StartTime      float64
	EndTime        float64
	ScientificName string
	CommonName     string
	Confidence     float64
	WAVPath        string
}
// birdaColumnIndices holds the parsed column positions from a BirdNET CSV header.
type birdaColumnIndices struct {
	startIdx      int
	endIdx        int
	commonNameIdx int
	confidenceIdx int
	fileIdx       int
}
// parseBirdaCSVHeader reads the CSV header row and returns column indices.
func parseBirdaCSVHeader(reader *csv.Reader) (birdaColumnIndices, error) {
	header, err := reader.Read()
	if err != nil {
		return birdaColumnIndices{}, fmt.Errorf("failed to read header: %w", err)
	}
	idx := birdaColumnIndices{startIdx: -1, endIdx: -1, commonNameIdx: -1, confidenceIdx: -1, fileIdx: -1}
	for i, col := range header {
		col = strings.TrimPrefix(col, "\ufeff")
		switch col {
		case "Start (s)":
			idx.startIdx = i
		case "End (s)":
			idx.endIdx = i
		case "Common name":
			idx.commonNameIdx = i
		case "Confidence":
			idx.confidenceIdx = i
		case "File":
			idx.fileIdx = i
		}
	}
	if idx.startIdx == -1 || idx.endIdx == -1 || idx.commonNameIdx == -1 || idx.confidenceIdx == -1 {
		return birdaColumnIndices{}, fmt.Errorf("missing required columns in BirdNET file")
	}
	return idx, nil
}
// readBirdaDetections reads all detection records from a BirdNET CSV.
func readBirdaDetections(reader *csv.Reader, idx birdaColumnIndices) ([]BirdNETDetection, error) {
	var detections []BirdNETDetection
	for {
		record, err := reader.Read()
		if err == io.EOF {
			break
		}
		if err != nil {
			return nil, fmt.Errorf("failed to read record: %w", err)
		}
		var det BirdNETDetection
		startTime, perr := strconv.ParseFloat(record[idx.startIdx], 64)
		if perr != nil {
			return nil, fmt.Errorf("failed to parse start time %q: %w", record[idx.startIdx], perr)
		}
		det.StartTime = startTime
		endTime, perr := strconv.ParseFloat(record[idx.endIdx], 64)
		if perr != nil {
			return nil, fmt.Errorf("failed to parse end time %q: %w", record[idx.endIdx], perr)
		}
		det.EndTime = endTime
		det.CommonName = record[idx.commonNameIdx]
		confidence, perr := strconv.ParseFloat(record[idx.confidenceIdx], 64)
		if perr != nil {
			return nil, fmt.Errorf("failed to parse confidence %q: %w", record[idx.confidenceIdx], perr)
		}
		det.Confidence = confidence
		if idx.fileIdx >= 0 && idx.fileIdx < len(record) {
			det.WAVPath = record[idx.fileIdx]
		}
		detections = append(detections, det)
	}
	return detections, nil
}
// resolveBirdaWAVPath finds the WAV file associated with a BirdNET results file.
func resolveBirdaWAVPath(birdaFile string, firstWAVPath string, cache *DirCache) string {
	if firstWAVPath != "" {
		if _, err := os.Stat(firstWAVPath); err == nil {
			return firstWAVPath
		}
	}
	dir := filepath.Dir(birdaFile)
	base := filepath.Base(birdaFile)
	baseName := strings.TrimSuffix(base, ".BirdNET.results.csv")
	if cache != nil {
		return cache.FindWAV(baseName)
	}
	return findWAVFile(dir, baseName)
}
// processBirdaFileCached processes a single BirdNET results file using a DirCache for WAV lookup
func processBirdaFileCached(birdaFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
	file, err := os.Open(birdaFile)
	if err != nil {
		return nil, false, false, fmt.Errorf("failed to open file: %w", err)
	}
	defer func() { _ = file.Close() }()
	reader := csv.NewReader(file)
	idx, err := parseBirdaCSVHeader(reader)
	if err != nil {
		return nil, false, false, err
	}
	detections, err := readBirdaDetections(reader, idx)
	if err != nil {
		return nil, false, false, err
	}
	if len(detections) == 0 {
		return nil, false, true, nil
	}
	wavPath := resolveBirdaWAVPath(birdaFile, detections[0].WAVPath, cache)
	if wavPath == "" {
		return nil, false, true, nil
	}
	sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
	if err != nil {
		return nil, false, true, nil
	}
	dataPath := wavPath + ".data"
	segments := buildBirdNETSegments(detections, sampleRate)
	meta := AviaNZMeta{Operator: "BirdNET", Duration: duration}
	reviewer := "None"
	meta.Reviewer = &reviewer
	if err := writeDotDataFileSafe(dataPath, segments, "BirdNET", meta); err != nil {
		return nil, false, false, err
	}
	var calls []ClusteredCall
	for _, det := range detections {
		calls = append(calls, ClusteredCall{
			File:      wavPath,
			StartTime: det.StartTime,
			EndTime:   det.EndTime,
			EbirdCode: det.CommonName,
			Segments:  1,
		})
	}
	return calls, true, false, nil
}
// buildBirdNETSegments converts BirdNET detections to AviaNZ segments
func buildBirdNETSegments(detections []BirdNETDetection, sampleRate int) []AviaNZSegment {
	var segments []AviaNZSegment
	for _, det := range detections {
		// Convert confidence (0.0-1.0) to certainty (0-100)
		certainty := min(max(int(det.Confidence*100), 0), 100)
		labels := []AviaNZLabel{
			{
				Species:   det.CommonName,
				Certainty: certainty,
				Filter:    "BirdNET",
			},
		}
		segment := AviaNZSegment{
			det.StartTime,
			det.EndTime,
			0,          // freq_low
			sampleRate, // freq_high (full band)
			labels,
		}
		segments = append(segments, segment)
	}
	return segments
}

File addition: calls_detect_anomalies_test.go (----------)

[0.67281]

package calls
import (
	"os"
	"path/filepath"
	"testing"
)
func TestDetectAnomalies_LabelMismatch(t *testing.T) {
	dir := t.TempDir()
	// Same time range, different calltypes across two models
	data := `[{"Operator":"test"},` +
		`[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-a"},` +
		`{"species":"Kiwi","calltype":"Male","certainty":100,"filter":"model-b"}]]]`
	if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
		t.Fatal(err)
	}
	out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
	if err != nil {
		t.Fatal(err)
	}
	if out.LabelMismatches != 1 {
		t.Errorf("expected 1 label mismatch, got %d", out.LabelMismatches)
	}
	if out.CertaintyMismatches != 0 {
		t.Errorf("expected 0 certainty mismatches, got %d", out.CertaintyMismatches)
	}
	if out.Anomalies[0].Type != "label_mismatch" {
		t.Errorf("expected label_mismatch, got %s", out.Anomalies[0].Type)
	}
}
func TestDetectAnomalies_CertaintyMismatch(t *testing.T) {
	dir := t.TempDir()
	// Same time range, same labels, different certainty
	data := `[{"Operator":"test"},` +
		`[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":90,"filter":"model-a"},` +
		`{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-b"}]]]`
	if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
		t.Fatal(err)
	}
	out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
	if err != nil {
		t.Fatal(err)
	}
	if out.CertaintyMismatches != 1 {
		t.Errorf("expected 1 certainty mismatch, got %d", out.CertaintyMismatches)
	}
	if out.LabelMismatches != 0 {
		t.Errorf("expected 0 label mismatches, got %d", out.LabelMismatches)
	}
}
func TestDetectAnomalies_NoAnomalyWhenAgreement(t *testing.T) {
	dir := t.TempDir()
	data := `[{"Operator":"test"},` +
		`[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-a"},` +
		`{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-b"}]]]`
	if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
		t.Fatal(err)
	}
	out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
	if err != nil {
		t.Fatal(err)
	}
	if out.AnomaliesTotal != 0 {
		t.Errorf("expected 0 anomalies, got %d", out.AnomaliesTotal)
	}
}
func TestDetectAnomalies_LonelySegmentSkipped(t *testing.T) {
	dir := t.TempDir()
	// model-a has a segment, model-b has no segment in this file
	data := `[{"Operator":"test"},` +
		`[0,10,100,1000,[{"species":"Kiwi","certainty":100,"filter":"model-a"}]]]`
	if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
		t.Fatal(err)
	}
	out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
	if err != nil {
		t.Fatal(err)
	}
	if out.AnomaliesTotal != 0 {
		t.Errorf("lonely segment should be skipped, got %d anomalies", out.AnomaliesTotal)
	}
	if out.FilesWithAllModels != 0 {
		t.Errorf("file missing a model should not count as FilesWithAllModels")
	}
}
func TestDetectAnomalies_FailsWithOneModel(t *testing.T) {
	dir := t.TempDir()
	_, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a"}})
	if err == nil {
		t.Error("expected error with only 1 model")
	}
}

File addition: calls_detect_anomalies.go (----------)

[0.67281]

package calls
import (
	"fmt"
	"os"
	"path/filepath"
	"skraak/utils"
)
type DetectAnomaliesInput struct {
	Folder  string
	Models  []string // at least 2 filter names
	Species []string // optional scope; empty = all species
}
type DetectAnomaliesOutput struct {
	Folder              string    `json:"folder"`
	Models              []string  `json:"models"`
	FilesExamined       int       `json:"files_examined"`
	FilesWithAllModels  int       `json:"files_with_all_models"`
	AnomaliesTotal      int       `json:"anomalies_total"`
	LabelMismatches     int       `json:"label_mismatches"`
	CertaintyMismatches int       `json:"certainty_mismatches"`
	Anomalies           []Anomaly `json:"anomalies,omitempty"`
	Error               string    `json:"error,omitempty"`
}
type Anomaly struct {
	File     string           `json:"file"`
	Type     string           `json:"type"` // "label_mismatch" | "certainty_mismatch"
	Segments []AnomalySegment `json:"segments"`
}
type AnomalySegment struct {
	Model     string  `json:"model"`
	Start     float64 `json:"start"`
	End       float64 `json:"end"`
	Species   string  `json:"species"`
	CallType  string  `json:"calltype,omitempty"`
	Certainty int     `json:"certainty"`
}
// DetectAnomalies compares corresponding segments across multiple ML model filters
// within each .data file. Segments are matched by time overlap (same logic as propagate).
// Lonely segments (no overlap in one or more models) are silently skipped.
// Anomalies are flagged when overlapping segments disagree on species+calltype,
// or when labels match but certainty values differ.
// validateAnomalyInput validates the input parameters for DetectAnomalies.
func validateAnomalyInput(input DetectAnomaliesInput) error {
	if len(input.Models) < 2 {
		return fmt.Errorf("at least 2 --model values required")
	}
	for i, a := range input.Models {
		for j, b := range input.Models {
			if i != j && a == b {
				return fmt.Errorf("duplicate --model values are not allowed")
			}
		}
	}
	info, err := os.Stat(input.Folder)
	if err != nil {
		return fmt.Errorf("folder not found: %s", input.Folder)
	}
	if !info.IsDir() {
		return fmt.Errorf("not a directory: %s", input.Folder)
	}
	return nil
}
func DetectAnomalies(input DetectAnomaliesInput) (DetectAnomaliesOutput, error) {
	folder := filepath.Clean(input.Folder)
	output := DetectAnomaliesOutput{
		Folder: folder,
		Models: input.Models,
	}
	if err := validateAnomalyInput(input); err != nil {
		output.Error = err.Error()
		return output, err
	}
	files, err := utils.FindDataFiles(folder)
	if err != nil {
		output.Error = fmt.Sprintf("list .data files: %v", err)
		return output, fmt.Errorf("%s", output.Error)
	}
	scopeSet := make(map[string]bool, len(input.Species))
	for _, s := range input.Species {
		scopeSet[s] = true
	}
	for _, path := range files {
		df, err := utils.ParseDataFile(path)
		if err != nil {
			continue
		}
		output.FilesExamined++
		anomalies := detectAnomaliesInFile(df, path, input.Models, scopeSet)
		if anomalies == nil {
			// file didn't have all models present
			continue
		}
		output.FilesWithAllModels++
		for _, a := range anomalies {
			if a.Type == "label_mismatch" {
				output.LabelMismatches++
			} else {
				output.CertaintyMismatches++
			}
		}
		output.Anomalies = append(output.Anomalies, anomalies...)
	}
	output.AnomaliesTotal = len(output.Anomalies)
	return output, nil
}
// labeledSeg pairs a segment with the specific label matching the model filter.
type labeledSeg struct {
	seg   *utils.Segment
	label *utils.Label
}
// detectAnomaliesInFile returns nil if the file doesn't contain all required models.
func detectAnomaliesInFile(df *utils.DataFile, path string, models []string, scope map[string]bool) []Anomaly {
	modelSegs := collectModelSegments(df, models)
	// Skip file if any model is entirely absent.
	for _, model := range models {
		if len(modelSegs[model]) == 0 {
			return nil
		}
	}
	var anomalies []Anomaly
	for _, anchor := range modelSegs[models[0]] {
		if !inScope(anchor, scope) {
			continue
		}
		if matches := findOverlappingMatches(anchor, models, modelSegs); matches == nil {
			continue
		} else {
			group := buildComparisonGroup(anchor, models, matches)
			if a := checkGroupAnomaly(group, path, models); a != nil {
				anomalies = append(anomalies, *a)
			}
		}
	}
	return anomalies
}
// collectModelSegments groups labeled segments by model filter name.
func collectModelSegments(df *utils.DataFile, models []string) map[string][]labeledSeg {
	modelSegs := make(map[string][]labeledSeg, len(models))
	for _, seg := range df.Segments {
		for _, lbl := range seg.Labels {
			for _, model := range models {
				if lbl.Filter == model {
					modelSegs[model] = append(modelSegs[model], labeledSeg{seg: seg, label: lbl})
					break
				}
			}
		}
	}
	return modelSegs
}
// inScope returns true if the anchor's label is within the species scope filter.
func inScope(anchor labeledSeg, scope map[string]bool) bool {
	if len(scope) == 0 {
		return true
	}
	key := anchor.label.Species
	if anchor.label.CallType != "" {
		key += "+" + anchor.label.CallType
	}
	return scope[key] || scope[anchor.label.Species]
}
// findOverlappingMatches returns matches[model] = overlapping segments from that model,
// or nil if any model has no overlap (lonely anchor).
func findOverlappingMatches(anchor labeledSeg, models []string, modelSegs map[string][]labeledSeg) map[string][]labeledSeg {
	matches := make(map[string][]labeledSeg, len(models)-1)
	for _, model := range models[1:] {
		for _, candidate := range modelSegs[model] {
			if overlaps(anchor.seg, candidate.seg) {
				matches[model] = append(matches[model], candidate)
			}
		}
		if len(matches[model]) == 0 {
			return nil
		}
	}
	return matches
}
// buildComparisonGroup assembles anchor + first match per other model.
func buildComparisonGroup(anchor labeledSeg, models []string, matches map[string][]labeledSeg) []labeledSeg {
	group := []labeledSeg{anchor}
	for _, model := range models[1:] {
		group = append(group, matches[model][0])
	}
	return group
}
// checkGroupAnomaly checks a comparison group for label or certainty mismatches.
func checkGroupAnomaly(group []labeledSeg, path string, models []string) *Anomaly {
	refSpecies := group[0].label.Species
	refCallType := group[0].label.CallType
	for _, ls := range group[1:] {
		if ls.label.Species != refSpecies || ls.label.CallType != refCallType {
			a := Anomaly{File: path, Type: "label_mismatch", Segments: buildAnomalySegs(group, models)}
			return &a
		}
	}
	refCertainty := group[0].label.Certainty
	for _, ls := range group[1:] {
		if ls.label.Certainty != refCertainty {
			a := Anomaly{File: path, Type: "certainty_mismatch", Segments: buildAnomalySegs(group, models)}
			return &a
		}
	}
	return nil
}
func buildAnomalySegs(group []labeledSeg, models []string) []AnomalySegment {
	segs := make([]AnomalySegment, len(group))
	for i, ls := range group {
		segs[i] = AnomalySegment{
			Model:     models[i],
			Start:     ls.seg.StartTime,
			End:       ls.seg.EndTime,
			Species:   ls.label.Species,
			CallType:  ls.label.CallType,
			Certainty: ls.label.Certainty,
		}
	}
	return segs
}
// overlaps returns true if two segments share any time overlap.
func overlaps(a, b *utils.Segment) bool {
	return a.StartTime < b.EndTime && b.StartTime < a.EndTime
}

File addition: calls_clip_labels_test.go (----------)

[0.67281]

package calls
import (
	"encoding/csv"
	"os"
	"path/filepath"
	"strings"
	"testing"
	"skraak/utils"
)
// --- test helpers (test file only) ---
func writeDataFile(t *testing.T, dir, name string, df *utils.DataFile) {
	t.Helper()
	if err := df.Write(filepath.Join(dir, name)); err != nil {
		t.Fatalf("write .data file %s: %v", name, err)
	}
}
func writeMapping(t *testing.T, dir, json string) {
	t.Helper()
	if err := os.WriteFile(filepath.Join(dir, "mapping.json"), []byte(json), 0644); err != nil {
		t.Fatalf("write mapping.json: %v", err)
	}
}
// parseCSV reads the output CSV, returning header and rows.
func parseCSV(t *testing.T, path string) ([]string, [][]string) {
	t.Helper()
	f, err := os.Open(path)
	if err != nil {
		t.Fatalf("open CSV %s: %v", path, err)
	}
	defer f.Close()
	r := csv.NewReader(f)
	header, err := r.Read()
	if err != nil {
		t.Fatalf("read header: %v", err)
	}
	rows, err := r.ReadAll()
	if err != nil {
		t.Fatalf("read rows: %v", err)
	}
	return header, rows
}
// clipLabels calls CallsClipLabels with standard test parameters.
func clipLabels(t *testing.T, dir string, extra ...func(*CallsClipLabelsInput)) CallsClipLabelsOutput {
	t.Helper()
	input := CallsClipLabelsInput{
		Folder:          dir,
		MappingPath:     filepath.Join(dir, "mapping.json"),
		OutputPath:      filepath.Join(dir, "clip_labels.csv"),
		ClipDuration:    5,
		ClipOverlap:     0,
		MinLabelOverlap: 0.25,
		FinalClip:       "full",
	}
	for _, fn := range extra {
		fn(&input)
	}
	out, err := CallsClipLabels(input)
	if err != nil {
		t.Fatalf("CallsClipLabels: %v", err)
	}
	return out
}
// --- tests ---
func TestClipLabels_RealClassTrue(t *testing.T) {
	dir := t.TempDir()
	writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
		Meta: &utils.DataMeta{Duration: 20},
		Segments: []*utils.Segment{
			{
				StartTime: 3, EndTime: 8, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
			},
		},
	})
	writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
	out := clipLabels(t, dir)
	header, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
	// Header: file, start_time, end_time, Kiwi
	if len(header) != 4 || header[3] != "Kiwi" {
		t.Fatalf("header = %v, want [..., Kiwi]", header)
	}
	// Clip 0-5 overlaps segment 3-8 by 2s ≥ 0.25 → Kiwi=True
	// Clip 5-10 overlaps segment 3-8 by 3s ≥ 0.25 → Kiwi=True
	// Clip 10-15, 15-20 → Kiwi=False
	kiwiCol := 3
	for i, row := range rows {
		switch row[1] {
		case "0.0", "5.0":
			if row[kiwiCol] != "True" {
				t.Errorf("row %d (start=%s): Kiwi=%s, want True", i, row[1], row[kiwiCol])
			}
		case "10.0", "15.0":
			if row[kiwiCol] != "False" {
				t.Errorf("row %d (start=%s): Kiwi=%s, want False", i, row[1], row[kiwiCol])
			}
		}
	}
	if out.PerClassTrueCount["Kiwi"] != 2 {
		t.Errorf("PerClassTrueCount[Kiwi] = %d, want 2", out.PerClassTrueCount["Kiwi"])
	}
}
func TestClipLabels_GapClipsAllFalse(t *testing.T) {
	dir := t.TempDir()
	// 15s file, Kiwi segment 0-5 only → clips 5-10 and 10-15 are gaps
	writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
		Meta: &utils.DataMeta{Duration: 15},
		Segments: []*utils.Segment{
			{
				StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
			},
		},
	})
	writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
	out := clipLabels(t, dir)
	if out.ClipsAllFalseGap != 2 {
		t.Errorf("ClipsAllFalseGap = %d, want 2", out.ClipsAllFalseGap)
	}
	if out.PerClassTrueCount["Kiwi"] != 1 {
		t.Errorf("PerClassTrueCount[Kiwi] = %d, want 1", out.PerClassTrueCount["Kiwi"])
	}
	if out.RowsWritten != 3 {
		t.Errorf("RowsWritten = %d, want 3", out.RowsWritten)
	}
}
func TestClipLabels_NegativeOverridesPositive(t *testing.T) {
	dir := t.TempDir()
	// Kiwi segment 0-8, Not segment 0-4 → clip 0-5 overlaps both → __NEGATIVE__ wins
	// Clip 5-10 overlaps only Kiwi (3s) → True
	writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
		Meta: &utils.DataMeta{Duration: 10},
		Segments: []*utils.Segment{
			{
				StartTime: 0, EndTime: 8, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
			},
			{
				StartTime: 0, EndTime: 4, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{{Species: "Not", Certainty: 100, Filter: "f1"}},
			},
		},
	})
	writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Not":{"species":"__NEGATIVE__"}}`)
	out := clipLabels(t, dir)
	if out.ClipsNegative != 1 {
		t.Errorf("ClipsNegative = %d, want 1", out.ClipsNegative)
	}
	_, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
	// Clip 0-5: negative hit → all-False (Not overlaps 0-4 by 4s)
	if rows[0][3] != "False" {
		t.Errorf("clip 0-5 Kiwi = %s, want False (overridden by __NEGATIVE__)", rows[0][3])
	}
	// Clip 5-10: only Kiwi overlaps (3s) → True
	if rows[1][3] != "True" {
		t.Errorf("clip 5-10 Kiwi = %s, want True", rows[1][3])
	}
}
func TestClipLabels_IgnoreExcludesClip(t *testing.T) {
	dir := t.TempDir()
	// Don't Know segment 0-5, Kiwi segment 6-10
	// Clip 0-5 overlaps __IGNORE__ → excluded
	// Clip 5-10 overlaps Kiwi → emitted with True
	writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
		Meta: &utils.DataMeta{Duration: 15},
		Segments: []*utils.Segment{
			{
				StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{{Species: "Don't Know", Certainty: 0, Filter: "f1"}},
			},
			{
				StartTime: 6, EndTime: 10, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
			},
		},
	})
	writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Don't Know":{"species":"__IGNORE__"}}`)
	out := clipLabels(t, dir)
	if out.ClipsIgnored != 1 {
		t.Errorf("ClipsIgnored = %d, want 1", out.ClipsIgnored)
	}
	if out.SegmentsIgnored != 1 {
		t.Errorf("SegmentsIgnored = %d, want 1", out.SegmentsIgnored)
	}
	// Only 2 rows: clip 5-10 (Kiwi=True) and clip 10-15 (gap)
	if out.RowsWritten != 2 {
		t.Errorf("RowsWritten = %d, want 2", out.RowsWritten)
	}
}
func TestClipLabels_FilterRestrictsLabels(t *testing.T) {
	dir := t.TempDir()
	// Same time range, two filters. Only "wanted" should contribute.
	writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
		Meta: &utils.DataMeta{Duration: 10},
		Segments: []*utils.Segment{
			{
				StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{
					{Species: "Kiwi", Certainty: 100, Filter: "wanted"},
					{Species: "Not", Certainty: 100, Filter: "unwanted"},
				},
			},
		},
	})
	writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Not":{"species":"__NEGATIVE__"}}`)
	out := clipLabels(t, dir, func(in *CallsClipLabelsInput) { in.Filter = "wanted" })
	// Only Kiwi from "wanted" filter → clip 0-5 should be Kiwi=True
	// Not from "unwanted" filter should be ignored → no __NEGATIVE__ override
	if out.ClipsNegative != 0 {
		t.Errorf("ClipsNegative = %d, want 0 (Not filter excluded)", out.ClipsNegative)
	}
	if out.PerClassTrueCount["Kiwi"] != 1 {
		t.Errorf("PerClassTrueCount[Kiwi] = %d, want 1", out.PerClassTrueCount["Kiwi"])
	}
}
func TestClipLabels_MappingCoverageError(t *testing.T) {
	dir := t.TempDir()
	writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
		Meta: &utils.DataMeta{Duration: 10},
		Segments: []*utils.Segment{
			{
				StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{{Species: "Mystery", Certainty: 100, Filter: "f1"}},
			},
		},
	})
	writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
	input := CallsClipLabelsInput{
		Folder:          dir,
		MappingPath:     filepath.Join(dir, "mapping.json"),
		OutputPath:      filepath.Join(dir, "clip_labels.csv"),
		ClipDuration:    5,
		ClipOverlap:     0,
		MinLabelOverlap: 0.25,
		FinalClip:       "full",
	}
	_, err := CallsClipLabels(input)
	if err == nil {
		t.Fatal("expected error for missing species in mapping")
	}
	if !strings.Contains(err.Error(), "Mystery") {
		t.Errorf("error should mention missing species, got: %v", err)
	}
}
func TestClipLabels_AppendMode(t *testing.T) {
	dir := t.TempDir()
	writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
	// First file
	writeDataFile(t, dir, "a.wav.data", &utils.DataFile{
		Meta: &utils.DataMeta{Duration: 5},
		Segments: []*utils.Segment{
			{
				StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
			},
		},
	})
	out1 := clipLabels(t, dir)
	if out1.RowsWritten != 1 {
		t.Fatalf("first run: RowsWritten = %d, want 1", out1.RowsWritten)
	}
	// Second run on same output file but with a different input folder
	// Simulate append by running again — should fail on duplicate
	_, err := CallsClipLabels(CallsClipLabelsInput{
		Folder:          dir,
		MappingPath:     filepath.Join(dir, "mapping.json"),
		OutputPath:      filepath.Join(dir, "clip_labels.csv"),
		ClipDuration:    5,
		ClipOverlap:     0,
		MinLabelOverlap: 0.25,
		FinalClip:       "full",
	})
	if err == nil {
		t.Fatal("expected duplicate error on second run with same folder")
	}
	if !strings.Contains(err.Error(), "duplicate") {
		t.Errorf("error should mention duplicate, got: %v", err)
	}
}
func TestClipLabels_MultipleFiles(t *testing.T) {
	dir := t.TempDir()
	writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
	writeDataFile(t, dir, "a.wav.data", &utils.DataFile{
		Meta: &utils.DataMeta{Duration: 10},
		Segments: []*utils.Segment{
			{
				StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
			},
		},
	})
	writeDataFile(t, dir, "b.wav.data", &utils.DataFile{
		Meta: &utils.DataMeta{Duration: 5},
		Segments: []*utils.Segment{
			{
				StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
				Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
			},
		},
	})
	out := clipLabels(t, dir)
	if out.DataFilesParsed != 2 {
		t.Errorf("DataFilesParsed = %d, want 2", out.DataFilesParsed)
	}
	// a: 2 clips (0-5, 5-10), b: 1 clip (0-5) = 3 total
	if out.RowsWritten != 3 {
		t.Errorf("RowsWritten = %d, want 3", out.RowsWritten)
	}
	_, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
	files := map[string]int{}
	for _, r := range rows {
		files[r[0]]++
	}
	if len(files) != 2 {
		t.Errorf("expected 2 distinct files in CSV, got %d", len(files))
	}
}

File addition: calls_clip_labels.go (----------)

[0.67281]

package calls
import (
	"encoding/csv"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"slices"
	"sort"
	"strconv"
	"strings"
	"skraak/utils"
)
// CallsClipLabelsInput configures the clip-labels exporter.
type CallsClipLabelsInput struct {
	Folder          string  `json:"folder"`
	MappingPath     string  `json:"mapping"`
	Filter          string  `json:"filter,omitempty"`
	OutputPath      string  `json:"output"`
	ClipDuration    float64 `json:"clip_duration"`
	ClipOverlap     float64 `json:"clip_overlap"`
	MinLabelOverlap float64 `json:"min_label_overlap"`
	FinalClip       string  `json:"final_clip"`
}
// CallsClipLabelsOutput summarises a run.
type CallsClipLabelsOutput struct {
	Folder            string         `json:"folder"`
	OutputPath        string         `json:"output"`
	Filter            string         `json:"filter,omitempty"`
	Classes           []string       `json:"classes"`
	DataFilesParsed   int            `json:"data_files_parsed"`
	ClipsNegative     int            `json:"clips_negative"`      // emitted, all-False because of __NEGATIVE__
	ClipsIgnored      int            `json:"clips_ignored"`       // excluded from output because of __IGNORE__ overlap
	SegmentsIgnored   int            `json:"segments_ignored"`    // segments whose species maps to __IGNORE__
	ClipsAllFalseGap  int            `json:"clips_all_false_gap"` // emitted, all-False because no overlap
	PerClassTrueCount map[string]int `json:"per_class_true_count"`
	AppendedToFile    bool           `json:"appended_to_file"`
	ExistingRowsFound int            `json:"existing_rows_found"`
	RowsWritten       int            `json:"rows_written"`
}
// resolvedSeg is a segment that has been classified by the mapping and is
// ready for overlap-checking against clip windows.
type resolvedSeg struct {
	start, end float64
	kind       utils.MappingKind
	classIdx   int // valid only when kind == utils.MappingReal
}
// clipDisposition describes the outcome for a single clip window.
type clipDisposition int
const (
	dispoLabelled clipDisposition = iota // at least one class column is True
	dispoNegative                        // __NEGATIVE__ hit, all class columns False
	dispoGap                             // no segment overlaps, all class columns False
	dispoIgnored                         // __IGNORE__ hit, clip excluded from output
)
// clipLabelsRow is one row of the output CSV.
type clipLabelsRow struct {
	file  string
	start float64
	end   float64
	flags []bool
}
// rowKey is used for duplicate detection.
type rowKey struct {
	file  string
	start string
	end   string
}
// CallsClipLabels reads .data files from a single folder and writes a CSV in
// OpenSoundScape's clip_labels format: one row per clip per file, with one
// True/False column per class in the mapping.
//
// Mirrors BoxedAnnotations.clip_labels(): every clip window is emitted; a
// column is True when any annotation of that class overlaps the window by
// ≥ min_label_overlap seconds. Sentinel mappings (__NEGATIVE__, __IGNORE__)
// get no column and contribute no labels.
// parsedClipFile holds a parsed .data file for clip-labels processing.
type parsedClipFile struct {
	path string
	df   *utils.DataFile
}
// validateClipLabelsInput validates the input parameters and returns the parsed finalClipMode.
func validateClipLabelsInput(input CallsClipLabelsInput) (utils.FinalClipMode, error) {
	finalClipMode, err := utils.ParseFinalClipMode(input.FinalClip)
	if err != nil {
		return 0, err
	}
	if input.ClipDuration <= 0 {
		return 0, fmt.Errorf("--clip-duration must be > 0, got %v", input.ClipDuration)
	}
	if input.ClipOverlap < 0 || input.ClipOverlap >= input.ClipDuration {
		return 0, fmt.Errorf("--clip-overlap must be in [0, clip-duration), got %v", input.ClipOverlap)
	}
	if input.MinLabelOverlap <= 0 {
		return 0, fmt.Errorf("--min-label-overlap must be > 0, got %v", input.MinLabelOverlap)
	}
	return finalClipMode, nil
}
// parseClipLabelsDataFiles finds and parses .data files, collecting species seen.
func parseClipLabelsDataFiles(folder, filter string, mapping utils.MappingFile) ([]parsedClipFile, error) {
	dataPaths, err := utils.FindDataFiles(folder)
	if err != nil {
		return nil, fmt.Errorf("scan folder %s: %w", folder, err)
	}
	if len(dataPaths) == 0 {
		return nil, fmt.Errorf("no .data files found in %s", folder)
	}
	speciesSeen := map[string]bool{}
	parsed := make([]parsedClipFile, 0, len(dataPaths))
	for _, p := range dataPaths {
		df, err := utils.ParseDataFile(p)
		if err != nil {
			return nil, fmt.Errorf("parse %s: %w", p, err)
		}
		if df.Meta == nil || df.Meta.Duration <= 0 {
			return nil, fmt.Errorf("missing or non-positive Duration in %s (cannot generate clips)", p)
		}
		for _, seg := range df.Segments {
			for _, lbl := range seg.Labels {
				if filter != "" && lbl.Filter != filter {
					continue
				}
				speciesSeen[lbl.Species] = true
			}
		}
		parsed = append(parsed, parsedClipFile{path: p, df: df})
	}
	if missing := mapping.ValidateCoversSpecies(speciesSeen); len(missing) > 0 {
		return nil, fmt.Errorf("mapping.json is missing entries for species: %s\n(run /data-mapping to regenerate)", strings.Join(missing, ", "))
	}
	return parsed, nil
}
// dedupClipLabelsRows checks for duplicate rows within new rows and against existing CSV rows.
func dedupClipLabelsRows(rows []clipLabelsRow, existing map[rowKey]bool) error {
	dedup := make(map[rowKey]bool, len(existing)+len(rows))
	for k := range existing {
		dedup[k] = true
	}
	for _, r := range rows {
		k := rowKey{file: r.file, start: formatTime(r.start), end: formatTime(r.end)}
		if dedup[k] {
			return fmt.Errorf("duplicate clip detected: file=%s start=%s end=%s", k.file, k.start, k.end)
		}
		dedup[k] = true
	}
	return nil
}
func CallsClipLabels(input CallsClipLabelsInput) (CallsClipLabelsOutput, error) {
	out := CallsClipLabelsOutput{
		Folder:            input.Folder,
		OutputPath:        input.OutputPath,
		PerClassTrueCount: map[string]int{},
	}
	finalClipMode, err := validateClipLabelsInput(input)
	if err != nil {
		return out, err
	}
	mapping, err := utils.LoadMappingFile(input.MappingPath)
	if err != nil {
		return out, fmt.Errorf("load mapping %s: %w", input.MappingPath, err)
	}
	classes := mapping.Classes()
	if len(classes) == 0 {
		return out, fmt.Errorf("mapping.json has no real (non-sentinel) classes")
	}
	out.Classes = classes
	out.Filter = input.Filter
	classIdx := map[string]int{}
	for i, c := range classes {
		classIdx[c] = i
	}
	parsed, err := parseClipLabelsDataFiles(input.Folder, input.Filter, mapping)
	if err != nil {
		return out, err
	}
	out.DataFilesParsed = len(parsed)
	expectedHeader := append([]string{"file", "start_time", "end_time"}, classes...)
	existing, appendMode, err := loadExistingRows(input.OutputPath, expectedHeader)
	if err != nil {
		return out, err
	}
	out.AppendedToFile = appendMode
	out.ExistingRowsFound = len(existing)
	cwd, err := os.Getwd()
	if err != nil {
		return out, fmt.Errorf("getwd: %w", err)
	}
	folderAbs, err := filepath.Abs(input.Folder)
	if err != nil {
		return out, fmt.Errorf("abs %s: %w", input.Folder, err)
	}
	rows := make([]clipLabelsRow, 0, 1024)
	for _, pf := range parsed {
		fileRows, err := processClipLabelsFile(pf.path, pf.df, mapping, classIdx, classes, input, finalClipMode, cwd, folderAbs, &out)
		if err != nil {
			return out, err
		}
		rows = append(rows, fileRows...)
	}
	if err := dedupClipLabelsRows(rows, existing); err != nil {
		return out, err
	}
	if err := writeRows(input.OutputPath, expectedHeader, rows, appendMode); err != nil {
		return out, err
	}
	out.RowsWritten = len(rows)
	sort.Strings(out.Classes)
	return out, nil
}
// processClipLabelsFile generates clip-labels rows for a single .data file.
func processClipLabelsFile(
	path string,
	df *utils.DataFile,
	mapping utils.MappingFile,
	classIdx map[string]int,
	classes []string,
	input CallsClipLabelsInput,
	finalClipMode utils.FinalClipMode,
	cwd, folderAbs string,
	out *CallsClipLabelsOutput,
) ([]clipLabelsRow, error) {
	windows, err := utils.GenerateClipTimes(
		df.Meta.Duration,
		input.ClipDuration,
		input.ClipOverlap,
		finalClipMode,
		10,
	)
	if err != nil {
		return nil, fmt.Errorf("generate clip windows for %s: %w", path, err)
	}
	if len(windows) == 0 {
		return nil, nil
	}
	segs := resolveSegments(df.Segments, input.Filter, input.MinLabelOverlap, mapping, classIdx, out)
	rel, err := computeWavRelPath(path, cwd, folderAbs)
	if err != nil {
		return nil, err
	}
	return labelClipWindows(windows, segs, rel, classes, input.MinLabelOverlap, out), nil
}
// resolveSegments maps segments to their classification and filters out mismatches.
func resolveSegments(
	segments []*utils.Segment,
	filter string,
	minLabelOverlap float64,
	mapping utils.MappingFile,
	classIdx map[string]int,
	out *CallsClipLabelsOutput,
) []resolvedSeg {
	segs := make([]resolvedSeg, 0, len(segments))
	for _, seg := range segments {
		if seg.EndTime-seg.StartTime < minLabelOverlap {
			continue
		}
		for _, lbl := range seg.Labels {
			if filter != "" && lbl.Filter != filter {
				continue
			}
			canon, kind, ok := mapping.Classify(lbl.Species)
			if !ok {
				continue
			}
			switch kind {
			case utils.MappingIgn:
				out.SegmentsIgnored++
				segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind})
			case utils.MappingNeg:
				segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind})
			case utils.MappingReal:
				idx, present := classIdx[canon]
				if !present {
					continue
				}
				segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind, classIdx: idx})
			}
		}
	}
	return segs
}
// computeWavRelPath computes the relative path from cwd to the WAV file corresponding to a .data file.
func computeWavRelPath(dataPath, cwd, folderAbs string) (string, error) {
	wavName := strings.TrimSuffix(filepath.Base(dataPath), ".data")
	wavAbs := filepath.Join(folderAbs, wavName)
	rel, err := filepath.Rel(cwd, wavAbs)
	if err != nil {
		rel = wavAbs
	}
	// Ensure relative paths start with ./ to match OPSO / pandas convention.
	if rel != "" && !filepath.IsAbs(rel) && !strings.HasPrefix(rel, "."+string(filepath.Separator)) {
		rel = "." + string(filepath.Separator) + rel
	}
	return rel, nil
}
// labelClipWindows classifies each clip window and builds the output rows.
func labelClipWindows(windows []utils.ClipWindow, segs []resolvedSeg, rel string, classes []string, minLabelOverlap float64, out *CallsClipLabelsOutput) []clipLabelsRow {
	var rows []clipLabelsRow
	for _, w := range windows {
		dispo, classHits := classifyClip(w, segs, minLabelOverlap, len(classes))
		if dispo == dispoIgnored {
			out.ClipsIgnored++
			continue
		}
		row := clipLabelsRow{
			file:  rel,
			start: w.Start,
			end:   w.End,
			flags: make([]bool, len(classes)),
		}
		switch dispo {
		case dispoNegative:
			out.ClipsNegative++
		case dispoGap:
			out.ClipsAllFalseGap++
		case dispoLabelled:
			for i, hit := range classHits {
				if hit {
					row.flags[i] = true
					out.PerClassTrueCount[classes[i]]++
				}
			}
		}
		rows = append(rows, row)
	}
	return rows
}
// classifyClip determines the disposition of a single clip window against
// the resolved segments. Priority: __IGNORE__ > __NEGATIVE__ > class labels.
func classifyClip(w utils.ClipWindow, segs []resolvedSeg, minLabelOverlap float64, nClasses int) (clipDisposition, []bool) {
	ignoreHit := false
	negativeHit := false
	classHits := make([]bool, nClasses)
	for _, s := range segs {
		if overlapSeconds(s.start, s.end, w.Start, w.End) < minLabelOverlap {
			continue
		}
		switch s.kind {
		case utils.MappingIgn:
			ignoreHit = true
		case utils.MappingNeg:
			negativeHit = true
		case utils.MappingReal:
			classHits[s.classIdx] = true
		}
	}
	if ignoreHit {
		return dispoIgnored, nil
	}
	if negativeHit {
		return dispoNegative, classHits
	}
	for _, hit := range classHits {
		if hit {
			return dispoLabelled, classHits
		}
	}
	return dispoGap, classHits
}
// loadExistingRows reads an existing output CSV and returns its row keys
// (for deduplication) and whether we're in append mode.
func loadExistingRows(outputPath string, expectedHeader []string) (map[rowKey]bool, bool, error) {
	fi, err := os.Stat(outputPath)
	if err != nil {
		if os.IsNotExist(err) {
			return nil, false, nil
		}
		return nil, false, fmt.Errorf("stat %s: %w", outputPath, err)
	}
	if fi.Size() == 0 {
		return nil, false, nil
	}
	f, err := os.Open(outputPath)
	if err != nil {
		return nil, false, fmt.Errorf("open existing %s: %w", outputPath, err)
	}
	defer func() { _ = f.Close() }()
	r := csv.NewReader(f)
	r.FieldsPerRecord = -1
	header, err := r.Read()
	if err != nil {
		return nil, false, fmt.Errorf("read header of existing %s: %w", outputPath, err)
	}
	if !slices.Equal(header, expectedHeader) {
		return nil, false, fmt.Errorf("column-set mismatch in existing %s\n  existing: %s\n  new:      %s",
			outputPath, strings.Join(header, ","), strings.Join(expectedHeader, ","))
	}
	existing := map[rowKey]bool{}
	for {
		rec, err := r.Read()
		if err == io.EOF {
			break
		}
		if err != nil {
			return nil, false, fmt.Errorf("read row of existing %s: %w", outputPath, err)
		}
		if len(rec) < 3 {
			return nil, false, fmt.Errorf("malformed row in existing %s: %v", outputPath, rec)
		}
		existing[rowKey{file: rec[0], start: rec[1], end: rec[2]}] = true
	}
	return existing, true, nil
}
// overlapSeconds returns the duration of overlap between two half-open intervals.
func overlapSeconds(aStart, aEnd, bStart, bEnd float64) float64 {
	lo := max(aStart, bStart)
	hi := min(aEnd, bEnd)
	if hi <= lo {
		return 0
	}
	return hi - lo
}
// formatTime renders a float to match pandas' default float repr in to_csv:
// always at least one decimal place, no trailing zeros beyond what's needed.
// e.g. 5 → "5.0", 5.5 → "5.5", 3.5001250000 → "3.500125".
func formatTime(v float64) string {
	s := strconv.FormatFloat(v, 'f', -1, 64)
	if !strings.ContainsRune(s, '.') {
		s += ".0"
	}
	return s
}
// writeRows writes the clip-labels rows to a CSV file.
func writeRows(path string, header []string, rows []clipLabelsRow, appendMode bool) error {
	var f *os.File
	var err error
	if appendMode {
		f, err = os.OpenFile(path, os.O_APPEND|os.O_WRONLY, 0644)
	} else {
		f, err = os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
	}
	if err != nil {
		return fmt.Errorf("open %s for write: %w", path, err)
	}
	defer func() { _ = f.Close() }()
	w := csv.NewWriter(f)
	if !appendMode {
		if err := w.Write(header); err != nil {
			return fmt.Errorf("write header: %w", err)
		}
	}
	if len(rows) == 0 {
		w.Flush()
		return w.Error()
	}
	rec := make([]string, 3+len(rows[0].flags))
	for _, r := range rows {
		rec[0] = r.file
		rec[1] = formatTime(r.start)
		rec[2] = formatTime(r.end)
		for i, b := range r.flags {
			if b {
				rec[3+i] = "True"
			} else {
				rec[3+i] = "False"
			}
		}
		if err := w.Write(rec); err != nil {
			return fmt.Errorf("write row: %w", err)
		}
	}
	w.Flush()
	return w.Error()
}

File addition: calls_clip_bench_test.go (----------)

[0.67281]

package calls
import (
	"encoding/binary"
	"math"
	"os"
	"testing"
	"skraak/utils"
)
const benchWAV = "../../audio/20211028_211500.WAV"
// ==================== WAV I/O ====================
func BenchmarkReadWAV(b *testing.B) {
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		_, _, err := utils.ReadWAVSamples(benchWAV)
		if err != nil {
			b.Fatal(err)
		}
	}
}
func BenchmarkConvertToFloat64_16bit(b *testing.B) {
	// Simulate 16-bit mono WAV data (same size as test file: 14.32M samples)
	numSamples := 14320000
	data := make([]byte, numSamples*2)
	for i := range numSamples {
		binary.LittleEndian.PutUint16(data[i*2:], uint16(i%65536))
	}
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		_ = convertToFloat64Bench(data, 16, 1)
	}
}
// Duplicate of convertToFloat64 for benchmarking (unexported in utils)
func convertToFloat64Bench(data []byte, bitsPerSample, channels int) []float64 {
	bytesPerSample := bitsPerSample / 8
	blockAlign := bytesPerSample * channels
	numSamples := len(data) / blockAlign
	samples := make([]float64, numSamples)
	for i := range numSamples {
		offset := i * blockAlign
		sample := int16(binary.LittleEndian.Uint16(data[offset : offset+2]))
		samples[i] = float64(sample) / 32768.0
	}
	return samples
}
func BenchmarkWriteWAV(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
	b.Logf("segment samples=%d", len(segSamples))
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		f, _ := os.CreateTemp("", "bench_*.wav")
		utils.WriteWAVFile(f.Name(), segSamples, sr)
		f.Close()
		os.Remove(f.Name())
	}
}
// ==================== Resample ====================
func BenchmarkResampleRate_48k(b *testing.B) {
	samples, _, _ := utils.ReadWAVSamples(benchWAV)
	b.Logf("resampling %d samples 48000->16000", len(samples))
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		utils.ResampleRate(samples, 48000, 16000)
	}
}
func BenchmarkResampleRate_250k(b *testing.B) {
	samples, _, _ := utils.ReadWAVSamples(benchWAV)
	b.Logf("resampling %d samples 250000->16000", len(samples))
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		utils.ResampleRate(samples, 250000, 16000)
	}
}
// ==================== Spectrogram pipeline ====================
func BenchmarkExtractSegment(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	b.Logf("full file: %d samples, sr=%d", len(samples), sr)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		seg := utils.ExtractSegmentSamples(samples, sr, 872, 895)
		if len(seg) == 0 {
			b.Fatal("empty segment")
		}
	}
}
func BenchmarkPowerSpectrumFFT_512(b *testing.B) {
	n := 512
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
	frameData := make([]float64, n)
	power := make([]float64, n/2+1)
	scratch := make([]complex128, n)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		// Simulate the windowing step (Hann) + FFT
		for j := range n {
			frameData[j] = segSamples[j] * 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(j)/float64(n-1)))
		}
		utils.PowerSpectrumFFT(frameData, power, scratch)
	}
}
func BenchmarkSpectrogram_23s(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
	cfg := utils.DefaultSpectrogramConfig(16000)
	b.Logf("segment samples=%d, windowSize=%d, hopSize=%d", len(segSamples), cfg.WindowSize, cfg.HopSize)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		spect := utils.GenerateSpectrogram(segSamples, cfg)
		if spect == nil {
			b.Fatal("nil spectrogram")
		}
	}
}
func BenchmarkSpectrogram_60s(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 0, 60)
	cfg := utils.DefaultSpectrogramConfig(16000)
	b.Logf("60s segment samples=%d", len(segSamples))
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		spect := utils.GenerateSpectrogram(segSamples, cfg)
		if spect == nil {
			b.Fatal("nil spectrogram")
		}
	}
}
// ==================== Image creation & resize ====================
func BenchmarkCreateGrayscaleImage(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
	cfg := utils.DefaultSpectrogramConfig(16000)
	spect := utils.GenerateSpectrogram(segSamples, cfg)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		img := utils.CreateGrayscaleImage(spect)
		if img == nil {
			b.Fatal("nil image")
		}
	}
}
func BenchmarkCreateRGBImage(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
	cfg := utils.DefaultSpectrogramConfig(16000)
	spect := utils.GenerateSpectrogram(segSamples, cfg)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		colorData := utils.ApplyL4Colormap(spect)
		img := utils.CreateRGBImage(colorData)
		if img == nil {
			b.Fatal("nil image")
		}
	}
}
func BenchmarkApplyL4Colormap(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
	cfg := utils.DefaultSpectrogramConfig(16000)
	spect := utils.GenerateSpectrogram(segSamples, cfg)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		colorData := utils.ApplyL4Colormap(spect)
		if colorData == nil {
			b.Fatal("nil colormap")
		}
	}
}
func BenchmarkResizeGray224(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
	cfg := utils.DefaultSpectrogramConfig(16000)
	spect := utils.GenerateSpectrogram(segSamples, cfg)
	img := utils.CreateGrayscaleImage(spect)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		resized := utils.ResizeImage(img, 224, 224)
		if resized == nil {
			b.Fatal("nil resize")
		}
	}
}
func BenchmarkResizeGray448(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
	cfg := utils.DefaultSpectrogramConfig(16000)
	spect := utils.GenerateSpectrogram(segSamples, cfg)
	img := utils.CreateGrayscaleImage(spect)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		resized := utils.ResizeImage(img, 448, 448)
		if resized == nil {
			b.Fatal("nil resize")
		}
	}
}
// ==================== PNG write ====================
func BenchmarkWritePNG_224(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
	cfg := utils.DefaultSpectrogramConfig(16000)
	spect := utils.GenerateSpectrogram(segSamples, cfg)
	img := utils.CreateGrayscaleImage(spect)
	resized := utils.ResizeImage(img, 224, 224)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		f, _ := os.CreateTemp("", "bench_*.png")
		utils.WritePNG(resized, f)
		f.Close()
		os.Remove(f.Name())
	}
}
// ==================== Full pipeline ====================
func BenchmarkFullPipelineGray224(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
		outputSR := sr
		if sr > 16000 {
			segSamples = utils.ResampleRate(segSamples, sr, 16000)
			outputSR = 16000
		}
		cfg := utils.DefaultSpectrogramConfig(outputSR)
		spect := utils.GenerateSpectrogram(segSamples, cfg)
		img := utils.CreateGrayscaleImage(spect)
		resized := utils.ResizeImage(img, 224, 224)
		f, _ := os.CreateTemp("", "bench_*.png")
		utils.WritePNG(resized, f)
		f.Close()
		os.Remove(f.Name())
		utils.WriteWAVFile(f.Name(), segSamples, outputSR)
		os.Remove(f.Name())
		_ = resized
	}
}
func BenchmarkFullPipelineColor448(b *testing.B) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
		outputSR := sr
		if sr > 16000 {
			segSamples = utils.ResampleRate(segSamples, sr, 16000)
			outputSR = 16000
		}
		cfg := utils.DefaultSpectrogramConfig(outputSR)
		spect := utils.GenerateSpectrogram(segSamples, cfg)
		colorData := utils.ApplyL4Colormap(spect)
		img := utils.CreateRGBImage(colorData)
		resized := utils.ResizeImage(img, 448, 448)
		f, _ := os.CreateTemp("", "bench_*.png")
		utils.WritePNG(resized, f)
		f.Close()
		os.Remove(f.Name())
		utils.WriteWAVFile(f.Name(), segSamples, outputSR)
		os.Remove(f.Name())
		_ = resized
	}
}
// ==================== Data dimension report ====================
func TestPipelineDimensions(t *testing.T) {
	samples, sr, _ := utils.ReadWAVSamples(benchWAV)
	segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
	t.Logf("Input: %d samples, sr=%d, segment=%d samples (%.1fs)",
		len(samples), sr, len(segSamples), float64(len(segSamples))/float64(sr))
	cfg := utils.DefaultSpectrogramConfig(16000)
	numFrames := (len(segSamples)-cfg.WindowSize)/cfg.HopSize + 1
	numBins := cfg.WindowSize/2 + 1
	t.Logf("Spectrogram: %d freq bins x %d time frames = %d values",
		numBins, numFrames, numBins*numFrames)
	spect := utils.GenerateSpectrogram(segSamples, cfg)
	t.Logf("Output: %d x %d (freq x time)", len(spect), len(spect[0]))
	img := utils.CreateGrayscaleImage(spect)
	t.Logf("Grayscale image: %dx%d pixels, %d bytes",
		img.Bounds().Dx(), img.Bounds().Dy(), img.Bounds().Dx()*img.Bounds().Dy())
	resized := utils.ResizeImage(img, 224, 224)
	t.Logf("Resized 224: %dx%d", resized.Bounds().Dx(), resized.Bounds().Dy())
	resized448 := utils.ResizeImage(img, 448, 448)
	t.Logf("Resized 448: %dx%d", resized448.Bounds().Dx(), resized448.Bounds().Dy())
}

File addition: calls_clip.go (----------)

[0.67281]

package calls
import (
	"fmt"
	"image"
	"math"
	"os"
	"path/filepath"
	"runtime"
	"strings"
	"sync"
	"skraak/utils"
)
// CallsClipInput defines the input for the clip tool
type CallsClipInput struct {
	File      string `json:"file"`
	Folder    string `json:"folder"`
	Output    string `json:"output"`
	Prefix    string `json:"prefix"`
	Filter    string `json:"filter"`
	Species   string `json:"species"`
	Certainty int    `json:"certainty"`
	Size      int    `json:"size"`
	Color     bool   `json:"color"`
	Night    bool   `json:"night"`
	Day      bool   `json:"day"`
	Location string `json:"location,omitempty"`
}
// CallsClipOutput defines the output for the clip tool
type CallsClipOutput struct {
	FilesProcessed  int      `json:"files_processed"`
	SegmentsClipped int      `json:"segments_clipped"`
	NightSkipped    int      `json:"night_skipped,omitempty"`
	DaySkipped      int      `json:"day_skipped,omitempty"`
	OutputFiles     []string `json:"output_files"`
	Errors          []string `json:"errors,omitempty"`
}
// CallsClip processes .data files and generates audio/image clips for matching segments
func CallsClip(input CallsClipInput) (CallsClipOutput, error) {
	var output CallsClipOutput
	// Validate required flags
	if err := validateClipInput(&output, input); err != nil {
		return output, err
	}
	// Parse species+calltype
	speciesName, callType := utils.ParseSpeciesCallType(input.Species)
	// Get list of .data files
	filePaths, err := resolveClipFiles(&output, input)
	if err != nil {
		return output, err
	}
	// Create output folder if it doesn't exist
	if err := os.MkdirAll(input.Output, 0755); err != nil {
		output.Errors = append(output.Errors, fmt.Sprintf("failed to create output folder: %v", err))
		return output, err
	}
	// Clamp image size to valid range
	imgSize := utils.ClampImageSize(input.Size)
	// Parse location into lat/lng/timezone
	var lat, lng float64
	var timezone string
	if input.Location != "" {
		var err error
		lat, lng, timezone, err = utils.ParseLocation(input.Location)
		if err != nil {
			output.Errors = append(output.Errors, err.Error())
			return output, err
		}
	}
	// Process .data files (parallel for larger batches)
	if len(filePaths) <= 2 {
		processFilesSequential(&output, filePaths, input, speciesName, callType, imgSize, lat, lng, timezone)
	} else {
		processFilesParallel(&output, filePaths, input, speciesName, callType, imgSize, lat, lng, timezone)
	}
	return output, nil
}
// validateClipInput validates required flags for clip generation.
func validateClipInput(output *CallsClipOutput, input CallsClipInput) error {
	if input.File == "" && input.Folder == "" {
		output.Errors = append(output.Errors, "either --file or --folder is required")
		return fmt.Errorf("missing required flag: --file or --folder")
	}
	if input.Output == "" {
		output.Errors = append(output.Errors, "--output is required")
		return fmt.Errorf("missing required flag: --output")
	}
	if input.Prefix == "" {
		output.Errors = append(output.Errors, "--prefix is required")
		return fmt.Errorf("missing required flag: --prefix")
	}
	return nil
}
// resolveClipFiles returns the list of .data file paths from input.
func resolveClipFiles(output *CallsClipOutput, input CallsClipInput) ([]string, error) {
	if input.File != "" {
		return []string{input.File}, nil
	}
	filePaths, err := utils.FindDataFiles(input.Folder)
	if err != nil {
		output.Errors = append(output.Errors, fmt.Sprintf("failed to find .data files: %v", err))
		return nil, err
	}
	if len(filePaths) == 0 {
		output.Errors = append(output.Errors, "no .data files found")
		return nil, fmt.Errorf("no .data files found")
	}
	return filePaths, nil
}
// processFilesSequential processes .data files one at a time.
func processFilesSequential(output *CallsClipOutput, filePaths []string, input CallsClipInput, speciesName, callType string, imgSize int, lat, lng float64, timezone string) {
	for _, dataPath := range filePaths {
		clips, skipped, errs := processFile(dataPath, input.Output, input.Prefix, input.Filter, speciesName, callType, input.Certainty, imgSize, input.Color, input.Night, input.Day, lat, lng, timezone)
		accumulateFileResult(output, clips, skipped, errs, input.Night)
	}
}
// processFilesParallel processes .data files using worker goroutines.
func processFilesParallel(output *CallsClipOutput, filePaths []string, input CallsClipInput, speciesName, callType string, imgSize int, lat, lng float64, timezone string) {
	type fileResult struct {
		clips   []string
		skipped int
		errs    []string
	}
	workers := min(runtime.NumCPU(), 8, len(filePaths))
	jobs := make(chan string, len(filePaths))
	results := make(chan fileResult, len(filePaths))
	var wg sync.WaitGroup
	for range workers {
		wg.Go(func() {
			for dataPath := range jobs {
				clips, skipped, errs := processFile(dataPath, input.Output, input.Prefix, input.Filter, speciesName, callType, input.Certainty, imgSize, input.Color, input.Night, input.Day, lat, lng, timezone)
				results <- fileResult{clips: clips, skipped: skipped, errs: errs}
			}
		})
	}
	for _, dataPath := range filePaths {
		jobs <- dataPath
	}
	close(jobs)
	go func() {
		wg.Wait()
		close(results)
	}()
	for r := range results {
		accumulateFileResult(output, r.clips, r.skipped, r.errs, input.Night)
	}
}
// accumulateFileResult merges a single file's results into the output.
func accumulateFileResult(output *CallsClipOutput, clips []string, skipped int, errs []string, night bool) {
	output.SegmentsClipped += len(clips)
	if night {
		output.NightSkipped += skipped
	} else {
		output.DaySkipped += skipped
	}
	output.OutputFiles = append(output.OutputFiles, clips...)
	output.Errors = append(output.Errors, errs...)
	if len(clips) > 0 || len(errs) == 0 {
		output.FilesProcessed++
	}
}
// processFile processes a single .data file and returns generated clips, time-filter-skipped count, and errors
func processFile(dataPath, outputDir, prefix, filter, speciesName, callType string, certainty, imgSize int, color, night, day bool, lat, lng float64, timezone string) ([]string, int, []string) {
	var clips []string
	var errors []string
	// Parse .data file
	dataFile, err := utils.ParseDataFile(dataPath)
	if err != nil {
		errors = append(errors, fmt.Sprintf("%s: failed to parse: %v", dataPath, err))
		return nil, 0, errors
	}
	// Get WAV basename (without path and extensions)
	wavPath := filepath.Clean(strings.TrimSuffix(dataPath, ".data"))
	basename := filepath.Base(wavPath)
	basename = strings.TrimSuffix(basename, filepath.Ext(basename))
	// Filter segments
	matchingSegments := filterSegments(dataFile.Segments, filter, speciesName, callType, certainty)
	if len(matchingSegments) == 0 {
		return nil, 0, nil
	}
	// Day/night filter: check WAV header only (cheaper than reading full audio).
	if night || day {
		skipped, err := checkDayNightFilter(wavPath, night, day, lat, lng, timezone)
		if err != nil || skipped {
			if skipped {
				return nil, 1, nil
			}
			return nil, 0, nil
		}
	}
	// Read WAV samples once
	samples, sampleRate, err := utils.ReadWAVSamples(wavPath)
	if err != nil {
		errors = append(errors, fmt.Sprintf("%s: failed to read WAV: %v", dataPath, err))
		return nil, 0, errors
	}
	// Process matching segments
	clips, errors = processSegments(matchingSegments, dataPath, samples, sampleRate, outputDir, prefix, basename, imgSize, color)
	return clips, 0, errors
}
// filterSegments returns segments matching the given filter criteria.
func filterSegments(segments []*utils.Segment, filter, speciesName, callType string, certainty int) []*utils.Segment {
	var matching []*utils.Segment
	for _, seg := range segments {
		if seg.SegmentMatchesFilters(filter, speciesName, callType, certainty) {
			matching = append(matching, seg)
		}
	}
	return matching
}
// checkDayNightFilter applies day/night filtering. Returns (skipped=true, nil) if the
// recording should be skipped, (false, nil) if it passes, or (false, err) on failure.
func checkDayNightFilter(wavPath string, night, day bool, lat, lng float64, timezone string) (bool, error) {
	result, err := IsNight(IsNightInput{
		FilePath: wavPath,
		Lat:      lat,
		Lng:      lng,
		Timezone: timezone,
	})
	if err != nil {
		fmt.Fprintf(os.Stderr, "warning: skipping %s (isnight error: %v)\n", wavPath, err)
		return false, err
	}
	if night && !result.SolarNight {
		fmt.Fprintf(os.Stderr, "skipped (daytime): %s\n", wavPath)
		return true, nil
	}
	if day && !result.DiurnalActive {
		fmt.Fprintf(os.Stderr, "skipped (nighttime): %s\n", wavPath)
		return true, nil
	}
	return false, nil
}
// processSegments generates clips for matching segments, using parallel processing for larger batches.
func processSegments(segments []*utils.Segment, dataPath string, samples []float64, sampleRate int, outputDir, prefix, basename string, imgSize int, color bool) ([]string, []string) {
	var clips []string
	var errors []string
	if len(segments) <= 2 {
		for _, seg := range segments {
			clipFiles, err := generateClip(samples, sampleRate, outputDir, prefix, basename, seg.StartTime, seg.EndTime, imgSize, color)
			if err != nil {
				errors = append(errors, fmt.Sprintf("%s: segment %.0f-%.0f: %v", dataPath, seg.StartTime, seg.EndTime, err))
				continue
			}
			clips = append(clips, clipFiles...)
		}
	} else {
		clips, errors = processSegmentsParallel(segments, dataPath, samples, sampleRate, outputDir, prefix, basename, imgSize, color)
	}
	return clips, errors
}
// processSegmentsParallel generates clips for segments using worker goroutines.
func processSegmentsParallel(segments []*utils.Segment, dataPath string, samples []float64, sampleRate int, outputDir, prefix, basename string, imgSize int, color bool) ([]string, []string) {
	type segResult struct {
		clips []string
		err   string
	}
	workers := min(runtime.NumCPU(), len(segments))
	jobs := make(chan *utils.Segment, len(segments))
	results := make(chan segResult, len(segments))
	var wg sync.WaitGroup
	for range workers {
		wg.Go(func() {
			for seg := range jobs {
				clipFiles, err := generateClip(samples, sampleRate, outputDir, prefix, basename, seg.StartTime, seg.EndTime, imgSize, color)
				if err != nil {
					results <- segResult{err: fmt.Sprintf("%s: segment %.0f-%.0f: %v", dataPath, seg.StartTime, seg.EndTime, err)}
				} else {
					results <- segResult{clips: clipFiles}
				}
			}
		})
	}
	for _, seg := range segments {
		jobs <- seg
	}
	close(jobs)
	go func() {
		wg.Wait()
		close(results)
	}()
	var clips []string
	var errors []string
	for r := range results {
		if r.err != "" {
			errors = append(errors, r.err)
		} else {
			clips = append(clips, r.clips...)
		}
	}
	return clips, errors
}
// generateClip generates PNG and WAV files for a segment
func generateClip(samples []float64, sampleRate int, outputDir, prefix, basename string, startTime, endTime float64, imgSize int, color bool) ([]string, error) {
	var files []string
	// Calculate integer times for filename
	startInt := int(math.Floor(startTime))
	endInt := int(math.Ceil(endTime))
	// Build base filename
	baseName := fmt.Sprintf("%s_%s_%d_%d", prefix, basename, startInt, endInt)
	wavPath := filepath.Join(outputDir, baseName+".wav")
	// Extract segment samples
	segSamples := utils.ExtractSegmentSamples(samples, sampleRate, startTime, endTime)
	if len(segSamples) == 0 {
		return nil, fmt.Errorf("no samples in segment")
	}
	// Determine output sample rate (downsample if > 16kHz)
	outputSampleRate := sampleRate
	if sampleRate > utils.DefaultMaxSampleRate {
		segSamples = utils.ResampleRate(segSamples, sampleRate, utils.DefaultMaxSampleRate)
		outputSampleRate = utils.DefaultMaxSampleRate
	}
	pngPath := filepath.Join(outputDir, baseName+".png")
	spectSampleRate := outputSampleRate
	config := utils.DefaultSpectrogramConfig(spectSampleRate)
	spectrogram := utils.GenerateSpectrogram(segSamples, config)
	if spectrogram == nil {
		return nil, fmt.Errorf("failed to generate spectrogram")
	}
	// Create image (grayscale or color)
	var img image.Image
	if color {
		colorData := utils.ApplyL4Colormap(spectrogram)
		img = utils.CreateRGBImage(colorData)
	} else {
		img = utils.CreateGrayscaleImage(spectrogram)
	}
	if img == nil {
		return nil, fmt.Errorf("failed to create image")
	}
	resized := utils.ResizeImage(img, imgSize, imgSize)
	// Write PNG (O_EXCL fails atomically if file exists)
	pngFile, err := os.OpenFile(pngPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0644)
	if err != nil {
		if os.IsExist(err) {
			return nil, fmt.Errorf("file already exists: %s", pngPath)
		}
		return nil, fmt.Errorf("failed to create PNG: %w", err)
	}
	if err := utils.WritePNG(resized, pngFile); err != nil {
		_ = pngFile.Close()
		return nil, fmt.Errorf("failed to write PNG: %w", err)
	}
	if err := pngFile.Close(); err != nil {
		return nil, fmt.Errorf("failed to close PNG: %w", err)
	}
	files = append(files, pngPath)
	// Write WAV
	if err := utils.WriteWAVFile(wavPath, segSamples, outputSampleRate); err != nil {
		return nil, fmt.Errorf("failed to write WAV: %w", err)
	}
	files = append(files, wavPath)
	return files, nil
}

File addition: calls_classify_test.go (----------)

[0.67281]

package calls
import (
	"testing"
	"skraak/utils"
)
func NewClassifyState(config ClassifyConfig, dataFiles []*utils.DataFile) *ClassifyState {
	hasFilter := config.Filter != "" || config.Species != "" || config.Certainty >= 0
	cached := make([][]*utils.Segment, len(dataFiles))
	for i, df := range dataFiles {
		if !hasFilter {
			cached[i] = df.Segments
		} else {
			for _, seg := range df.Segments {
				if seg.SegmentMatchesFilters(config.Filter, config.Species, config.CallType, config.Certainty) {
					cached[i] = append(cached[i], seg)
				}
			}
		}
	}
	total := 0
	for _, segs := range cached {
		total += len(segs)
	}
	return &ClassifyState{
		Config:       config,
		DataFiles:    dataFiles,
		filteredSegs: cached,
		totalSegs:    total,
	}
}
func TestParseKeyBuffer(t *testing.T) {
	bindings := []KeyBinding{
		{Key: "k", Species: "Kiwi"},
		{Key: "d", Species: "Kiwi", CallType: "Duet"},
		{Key: "n", Species: "Don't Know"},
		{Key: "p", Species: "Morepork"},
	}
	state := NewClassifyState(ClassifyConfig{Bindings: bindings, Certainty: -1}, nil)
	tests := []struct {
		key     string
		want    *BindingResult
		wantNil bool
	}{
		{"k", &BindingResult{Species: "Kiwi"}, false},
		{"d", &BindingResult{Species: "Kiwi", CallType: "Duet"}, false},
		{"n", &BindingResult{Species: "Don't Know"}, false},
		{"p", &BindingResult{Species: "Morepork"}, false},
		{"x", nil, true}, // unknown key
	}
	for _, tt := range tests {
		got := state.ParseKeyBuffer(tt.key)
		if tt.wantNil {
			if got != nil {
				t.Errorf("ParseKeyBuffer(%q) = %v, want nil", tt.key, got)
			}
		} else {
			if got == nil {
				t.Errorf("ParseKeyBuffer(%q) = nil, want %+v", tt.key, tt.want)
				continue
			}
			if got.Species != tt.want.Species {
				t.Errorf("ParseKeyBuffer(%q).Species = %q, want %q", tt.key, got.Species, tt.want.Species)
			}
			if got.CallType != tt.want.CallType {
				t.Errorf("ParseKeyBuffer(%q).CallType = %q, want %q", tt.key, got.CallType, tt.want.CallType)
			}
		}
	}
}
func TestApplyBinding(t *testing.T) {
	bindings := []KeyBinding{
		{Key: "k", Species: "Kiwi"},
		{Key: "n", Species: "Don't Know"},
		{Key: "d", Species: "Kiwi", CallType: "Duet"},
	}
	df := &utils.DataFile{
		Meta: &utils.DataMeta{},
		Segments: []*utils.Segment{
			{
				StartTime: 10.0,
				EndTime:   20.0,
				Labels: []*utils.Label{
					{Species: "Unknown", Certainty: 50, Filter: "test-filter", CallType: "OldType"},
				},
			},
		},
	}
	state := NewClassifyState(ClassifyConfig{
		Filter:    "test-filter",
		Reviewer:  "David",
		Bindings:  bindings,
		Certainty: -1,
	}, []*utils.DataFile{df})
	// Apply "k" = Kiwi (no calltype, should remove existing calltype)
	result := &BindingResult{Species: "Kiwi"}
	state.ApplyBinding(result)
	// Check label was updated
	if len(df.Segments[0].Labels) != 1 {
		t.Errorf("expected 1 label, got %d", len(df.Segments[0].Labels))
	}
	if df.Segments[0].Labels[0].Species != "Kiwi" {
		t.Errorf("expected Species=Kiwi, got %s", df.Segments[0].Labels[0].Species)
	}
	if df.Segments[0].Labels[0].Certainty != 100 {
		t.Errorf("expected Certainty=100, got %d", df.Segments[0].Labels[0].Certainty)
	}
	if df.Segments[0].Labels[0].CallType != "" {
		t.Errorf("expected CallType='', got %s (should be removed)", df.Segments[0].Labels[0].CallType)
	}
	if df.Meta.Reviewer != "David" {
		t.Errorf("expected Reviewer=David, got %s", df.Meta.Reviewer)
	}
	// Apply "d" = Kiwi/Duet (should set calltype)
	result = &BindingResult{Species: "Kiwi", CallType: "Duet"}
	state.ApplyBinding(result)
	if df.Segments[0].Labels[0].CallType != "Duet" {
		t.Errorf("expected CallType=Duet, got %s", df.Segments[0].Labels[0].CallType)
	}
	// Apply "n" = Don't Know (certainty should be 0)
	result = &BindingResult{Species: "Don't Know"}
	state.ApplyBinding(result)
	if df.Segments[0].Labels[0].Species != "Don't Know" {
		t.Errorf("expected Species=Don't Know, got %s", df.Segments[0].Labels[0].Species)
	}
	if df.Segments[0].Labels[0].Certainty != 0 {
		t.Errorf("expected Certainty=0 for Don't Know, got %d", df.Segments[0].Labels[0].Certainty)
	}
}
func TestApplyBindingCallTypeRemoval(t *testing.T) {
	bindings := []KeyBinding{
		{Key: "k", Species: "Kiwi"}, // no calltype
	}
	df := &utils.DataFile{
		Meta: &utils.DataMeta{},
		Segments: []*utils.Segment{
			{
				StartTime: 10.0,
				EndTime:   20.0,
				Labels: []*utils.Label{
					{Species: "Kiwi", Certainty: 100, Filter: "test-filter", CallType: "Male"},
				},
			},
		},
	}
	state := NewClassifyState(ClassifyConfig{
		Filter:    "test-filter",
		Reviewer:  "David",
		Bindings:  bindings,
		Certainty: -1,
	}, []*utils.DataFile{df})
	// Apply "k" = Kiwi (should remove Male calltype)
	result := &BindingResult{Species: "Kiwi"}
	state.ApplyBinding(result)
	if df.Segments[0].Labels[0].CallType != "" {
		t.Errorf("expected CallType='', got %s (should be removed)", df.Segments[0].Labels[0].CallType)
	}
}
func TestConfirmLabelDontKnow(t *testing.T) {
	df := &utils.DataFile{
		Meta: &utils.DataMeta{},
		Segments: []*utils.Segment{
			{
				StartTime: 10.0,
				EndTime:   20.0,
				Labels: []*utils.Label{
					{Species: "Don't Know", Certainty: 0, Filter: "test-filter"},
				},
			},
		},
	}
	state := NewClassifyState(ClassifyConfig{
		Filter:    "test-filter",
		Reviewer:  "David",
		Certainty: -1,
	}, []*utils.DataFile{df})
	// ConfirmLabel on Don't Know should be a no-op
	if state.ConfirmLabel() {
		t.Error("ConfirmLabel() should return false for Don't Know (certainty=0)")
	}
	label := df.Segments[0].Labels[0]
	if label.Species != "Don't Know" {
		t.Errorf("Species should remain Don't Know, got %s", label.Species)
	}
	if label.Certainty != 0 {
		t.Errorf("Certainty should remain 0, got %d", label.Certainty)
	}
	if state.Dirty {
		t.Error("State should not be dirty after confirming Don't Know")
	}
}

File addition: calls_classify_load_test.go (----------)

[0.67281]

package calls
import (
	"os"
	"path/filepath"
	"testing"
)
// writeDataFileContent creates a .data file in dir with the given raw content.
func writeDataFileContent(t *testing.T, dir, name, content string) {
	t.Helper()
	if err := os.WriteFile(filepath.Join(dir, name), []byte(content), 0644); err != nil {
		t.Fatal(err)
	}
}
// mustLoadDataFiles is a test helper that calls LoadDataFiles and fatals on error.
func mustLoadDataFiles(t *testing.T, config ClassifyConfig) *ClassifyState {
	t.Helper()
	state, err := LoadDataFiles(config)
	if err != nil {
		t.Fatal(err)
	}
	return state
}
// assertFileSegCounts checks file count and total segment count match expected values.
func assertFileSegCounts(t *testing.T, state *ClassifyState, wantFiles, wantSegs int, label string) {
	t.Helper()
	if len(state.DataFiles) != wantFiles {
		t.Errorf("%s: expected %d files, got %d", label, wantFiles, len(state.DataFiles))
	}
	if state.TotalSegments() != wantSegs {
		t.Errorf("%s: expected %d segments total, got %d", label, wantSegs, state.TotalSegments())
	}
}
const (
	kiwiSeg   = `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]]]`
	tomtitSeg = `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Tomtit", "certainty": 90}]]]`
)
func TestLoadDataFilesFiltersFilesWithNoMatchingSegments(t *testing.T) {
	tempDir := t.TempDir()
	writeDataFileContent(t, tempDir, "file1.data", kiwiSeg)
	writeDataFileContent(t, tempDir, "file2.data", tomtitSeg)
	writeDataFileContent(t, tempDir, "file3.data", kiwiSeg)
	t.Run("no_filter", func(t *testing.T) {
		state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Certainty: -1})
		assertFileSegCounts(t, state, 3, 3, "No filter")
	})
	t.Run("species_kiwi", func(t *testing.T) {
		state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})
		assertFileSegCounts(t, state, 2, 2, "Species=Kiwi")
	})
	t.Run("species_tomtit", func(t *testing.T) {
		state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Tomtit", Certainty: -1})
		assertFileSegCounts(t, state, 1, 1, "Species=Tomtit")
	})
	t.Run("species_nonexistent", func(t *testing.T) {
		state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "NonExistent", Certainty: -1})
		assertFileSegCounts(t, state, 0, 0, "Species=NonExistent")
	})
}
func TestLoadDataFilesWithMixedSegments(t *testing.T) {
	tempDir := t.TempDir()
	file := `[
		{"Operator": "test"},
		[0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]],
		[10, 20, 100, 1000, [{"species": "Tomtit", "certainty": 80}]],
		[20, 30, 100, 1000, [{"species": "Kiwi", "certainty": 95}]]
	]`
	writeDataFileContent(t, tempDir, "mixed.data", file)
	state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})
	if len(state.DataFiles) != 1 {
		t.Errorf("Expected 1 file, got %d", len(state.DataFiles))
	}
	if state.TotalSegments() != 2 {
		t.Errorf("Species=Kiwi: expected 2 segments, got %d", state.TotalSegments())
	}
	// The DataFile should still have all 3 segments internally
	// but cached filtered segments should return only the Kiwi ones
	if len(state.DataFiles[0].Segments) != 3 {
		t.Errorf("DataFile should have 3 segments internally, got %d", len(state.DataFiles[0].Segments))
	}
	// TotalSegments uses cached filtered segments
	if state.TotalSegments() != 2 {
		t.Errorf("TotalSegments should return 2 Kiwi segments, got %d", state.TotalSegments())
	}
}
// Test that the original DataFile segments are not modified (immutable filtering)
func TestFilteringDoesNotModifyOriginalSegments(t *testing.T) {
	tempDir := t.TempDir()
	file := `[
		{"Operator": "test"},
		[0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]],
		[10, 20, 100, 1000, [{"species": "Tomtit", "certainty": 80}]]
	]`
	writeDataFileContent(t, tempDir, "test.data", file)
	state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})
	// Original segments should be untouched
	originalSegments := state.DataFiles[0].Segments
	if len(originalSegments) != 2 {
		t.Errorf("Original should have 2 segments, got %d", len(originalSegments))
	}
	// Verify all original segments are preserved
	species := []string{}
	for _, seg := range originalSegments {
		if len(seg.Labels) > 0 {
			species = append(species, seg.Labels[0].Species)
		}
	}
	if len(species) != 2 || species[0] != "Kiwi" || species[1] != "Tomtit" {
		t.Errorf("Original segments should have both species, got %v", species)
	}
}
func TestLoadDataFilesCertaintyPruning(t *testing.T) {
	tempDir := t.TempDir()
	writeDataFileContent(t, tempDir, "file1.data", `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 70}]]]`)
	writeDataFileContent(t, tempDir, "file2.data", `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 100}]]]`)
	state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Certainty: 100})
	assertFileSegCounts(t, state, 1, 1, "Certainty=100")
	// CurrentSegment should work (not nil) because file1 was pruned
	seg := state.CurrentSegment()
	if seg == nil {
		t.Error("CurrentSegment should not be nil after pruning")
	}
}

File addition: calls_classify_filter_test.go (----------)

[0.67281]

package calls
import (
	"math/rand"
	"testing"
	"skraak/utils"
)
func TestTotalSegmentsRespectsFilters(t *testing.T) {
	// Create test data files with different species and filters
	df1 := &utils.DataFile{
		FilePath: "/test/file1.data",
		Segments: []*utils.Segment{
			{
				StartTime: 0,
				EndTime:   10,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0"},
				},
			},
			{
				StartTime: 10,
				EndTime:   20,
				Labels: []*utils.Label{
					{Species: "Tomtit", Filter: "model-1.0"},
				},
			},
		},
	}
	df2 := &utils.DataFile{
		FilePath: "/test/file2.data",
		Segments: []*utils.Segment{
			{
				StartTime: 0,
				EndTime:   10,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0"},
				},
			},
		},
	}
	// Test 1: No filters - should count all segments (3)
	state1 := NewClassifyState(ClassifyConfig{Certainty: -1}, []*utils.DataFile{df1, df2})
	if got := state1.TotalSegments(); got != 3 {
		t.Errorf("No filters: expected 3 segments, got %d", got)
	}
	// Test 2: Filter by species "Kiwi" - should count only Kiwi segments (2)
	state2 := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df1, df2})
	if got := state2.TotalSegments(); got != 2 {
		t.Errorf("Species=Kiwi: expected 2 segments, got %d", got)
	}
	// Test 3: Filter by species "Tomtit" - should count only Tomtit segments (1)
	state3 := NewClassifyState(ClassifyConfig{Species: "Tomtit", Certainty: -1}, []*utils.DataFile{df1, df2})
	if got := state3.TotalSegments(); got != 1 {
		t.Errorf("Species=Tomtit: expected 1 segment, got %d", got)
	}
	// Test 4: Filter by filter name "model-1.0" - should count all segments (3)
	state4 := NewClassifyState(ClassifyConfig{Filter: "model-1.0", Certainty: -1}, []*utils.DataFile{df1, df2})
	if got := state4.TotalSegments(); got != 3 {
		t.Errorf("Filter=model-1.0: expected 3 segments, got %d", got)
	}
	// Test 5: Filter by non-existent species - should count 0
	state5 := NewClassifyState(ClassifyConfig{Species: "NonExistent", Certainty: -1}, []*utils.DataFile{df1, df2})
	if got := state5.TotalSegments(); got != 0 {
		t.Errorf("Species=NonExistent: expected 0 segments, got %d", got)
	}
	// Test 6: Combined filter + species
	df3 := &utils.DataFile{
		FilePath: "/test/file3.data",
		Segments: []*utils.Segment{
			{
				StartTime: 0,
				EndTime:   10,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0", CallType: "Duet"},
				},
			},
			{
				StartTime: 10,
				EndTime:   20,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-2.0", CallType: "Male"},
				},
			},
		},
	}
	state6 := NewClassifyState(ClassifyConfig{Filter: "model-1.0", Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df3})
	if got := state6.TotalSegments(); got != 1 {
		t.Errorf("Filter=model-1.0 + Species=Kiwi: expected 1 segment, got %d", got)
	}
}
func TestCurrentSegmentNumberWithFilters(t *testing.T) {
	// Create test data files
	df1 := &utils.DataFile{
		FilePath: "/test/file1.data",
		Segments: []*utils.Segment{
			{
				StartTime: 0,
				EndTime:   10,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0"},
				},
			},
			{
				StartTime: 10,
				EndTime:   20,
				Labels: []*utils.Label{
					{Species: "Tomtit", Filter: "model-1.0"},
				},
			},
		},
	}
	df2 := &utils.DataFile{
		FilePath: "/test/file2.data",
		Segments: []*utils.Segment{
			{
				StartTime: 0,
				EndTime:   10,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0"},
				},
			},
		},
	}
	// Test: Filter by species "Kiwi", at file 2, segment 0
	// Should report current segment as 2 (first Kiwi in df1 + first Kiwi in df2)
	state := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df1, df2})
	state.FileIdx = 1 // at df2
	state.SegmentIdx = 0
	if got := state.CurrentSegmentNumber(); got != 2 {
		t.Errorf("Species=Kiwi, at file 2, seg 0: expected current segment 2, got %d", got)
	}
}
func TestCertaintyFiltering(t *testing.T) {
	// Create test data files with different certainty levels
	df := &utils.DataFile{
		FilePath: "/test/file1.data",
		Segments: []*utils.Segment{
			{
				StartTime: 0,
				EndTime:   10,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0", Certainty: 70},
				},
			},
			{
				StartTime: 10,
				EndTime:   20,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0", Certainty: 100},
				},
			},
			{
				StartTime: 20,
				EndTime:   30,
				Labels: []*utils.Label{
					{Species: "Tomtit", Filter: "model-1.0", Certainty: 70},
				},
			},
		},
	}
	// Test 1: Filter by certainty 70 - should get 2 segments
	state1 := NewClassifyState(ClassifyConfig{Certainty: 70}, []*utils.DataFile{df})
	if got := state1.TotalSegments(); got != 2 {
		t.Errorf("Certainty=70: expected 2 segments, got %d", got)
	}
	// Test 2: Filter by certainty 100 - should get 1 segment
	state2 := NewClassifyState(ClassifyConfig{Certainty: 100}, []*utils.DataFile{df})
	if got := state2.TotalSegments(); got != 1 {
		t.Errorf("Certainty=100: expected 1 segment, got %d", got)
	}
	// Test 3: Filter by certainty 0 - should get 0 segments
	state3 := NewClassifyState(ClassifyConfig{Certainty: 0}, []*utils.DataFile{df})
	if got := state3.TotalSegments(); got != 0 {
		t.Errorf("Certainty=0: expected 0 segments, got %d", got)
	}
	// Test 4: Combined species + certainty
	state4 := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: 70}, []*utils.DataFile{df})
	if got := state4.TotalSegments(); got != 1 {
		t.Errorf("Species=Kiwi + Certainty=70: expected 1 segment, got %d", got)
	}
}
func TestSampling(t *testing.T) {
	makeSegs := func(n int) []*utils.Segment {
		s := make([]*utils.Segment, n)
		for i := range s {
			s[i] = &utils.Segment{StartTime: float64(i), EndTime: float64(i + 1)}
		}
		return s
	}
	df1 := &utils.DataFile{FilePath: "/test/f1.data", Segments: makeSegs(6)}
	df2 := &utils.DataFile{FilePath: "/test/f2.data", Segments: makeSegs(4)}
	kept := []*utils.DataFile{df1, df2}
	cached := [][]*utils.Segment{df1.Segments, df2.Segments}
	countTotal := func(c [][]*utils.Segment) int {
		n := 0
		for _, s := range c {
			n += len(s)
		}
		return n
	}
	// 50% of 10 → 5
	k, c := applySampling(kept, cached, 50, rand.New(rand.NewSource(42)))
	if got := countTotal(c); got != 5 {
		t.Errorf("sample 50%%: expected 5, got %d", got)
	}
	// Files must be in original chronological order
	for i := 1; i < len(k); i++ {
		if k[i].FilePath < k[i-1].FilePath {
			t.Errorf("sample 50%%: files out of order at index %d", i)
		}
	}
	// 10% of 10 → 1
	_, c2 := applySampling(kept, cached, 10, rand.New(rand.NewSource(42)))
	if got := countTotal(c2); got != 1 {
		t.Errorf("sample 10%%: expected 1, got %d", got)
	}
	// 1% of 10 → clamp to 1
	_, c3 := applySampling(kept, cached, 1, rand.New(rand.NewSource(42)))
	if got := countTotal(c3); got != 1 {
		t.Errorf("sample 1%%: expected 1 (clamped), got %d", got)
	}
	// 99% of 10 → 9
	_, c4 := applySampling(kept, cached, 99, rand.New(rand.NewSource(42)))
	if got := countTotal(c4); got != 9 {
		t.Errorf("sample 99%%: expected 9, got %d", got)
	}
}
func TestCertaintyPruning(t *testing.T) {
	// Simulate the bug: first file has no matching certainty segments
	df1 := &utils.DataFile{
		FilePath: "/test/file1.data",
		Segments: []*utils.Segment{
			{
				StartTime: 0,
				EndTime:   10,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0", Certainty: 70},
				},
			},
		},
	}
	df2 := &utils.DataFile{
		FilePath: "/test/file2.data",
		Segments: []*utils.Segment{
			{
				StartTime: 0,
				EndTime:   10,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0", Certainty: 100},
				},
			},
		},
	}
	// Without pruning (old bug): file1 is first, has no certainty=100 segments
	// CurrentSegment() would return nil even though TotalSegments() > 0
	state := NewClassifyState(ClassifyConfig{Certainty: 100}, []*utils.DataFile{df1, df2})
	// TotalSegments should be 1 (only file2 has certainty 100)
	if got := state.TotalSegments(); got != 1 {
		t.Errorf("Certainty=100: expected 1 segment, got %d", got)
	}
	// CurrentSegment should work if files are properly pruned
	// Note: this test assumes LoadDataFiles does the pruning
	// Here we test the state after manual construction
}
func TestCallTypeNoneFiltering(t *testing.T) {
	// Create test data: Kiwi with calltype, Kiwi without, Tomtit without
	df := &utils.DataFile{
		FilePath: "/test/file1.data",
		Segments: []*utils.Segment{
			{
				StartTime: 0,
				EndTime:   10,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0", CallType: "Male"},
				},
			},
			{
				StartTime: 10,
				EndTime:   20,
				Labels: []*utils.Label{
					{Species: "Kiwi", Filter: "model-1.0"}, // no calltype
				},
			},
			{
				StartTime: 20,
				EndTime:   30,
				Labels: []*utils.Label{
					{Species: "Tomtit", Filter: "model-1.0"}, // no calltype, wrong species
				},
			},
		},
	}
	// Test 1: --species Kiwi+_ should match only Kiwi with no calltype (1 segment)
	state1 := NewClassifyState(ClassifyConfig{Species: "Kiwi", CallType: utils.CallTypeNone, Certainty: -1}, []*utils.DataFile{df})
	if got := state1.TotalSegments(); got != 1 {
		t.Errorf("Species=Kiwi+_: expected 1 segment, got %d", got)
	}
	// Test 2: --species Kiwi should still match all Kiwi (2 segments)
	state2 := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df})
	if got := state2.TotalSegments(); got != 2 {
		t.Errorf("Species=Kiwi: expected 2 segments, got %d", got)
	}
	// Test 3: --species Kiwi+Male should still work as before (1 segment)
	state3 := NewClassifyState(ClassifyConfig{Species: "Kiwi", CallType: "Male", Certainty: -1}, []*utils.DataFile{df})
	if got := state3.TotalSegments(); got != 1 {
		t.Errorf("Species=Kiwi+Male: expected 1 segment, got %d", got)
	}
}

File addition: calls_classify.go (----------)

[0.67281]

package calls
import (
	"fmt"
	"math/rand"
	"os"
	"path/filepath"
	"slices"
	"sort"
	"strings"
	"time"
	"skraak/utils"
)
// KeyBinding maps a key to a species/calltype
type KeyBinding struct {
	Key      string // single char: "k", "n", "p"
	Species  string // "Kiwi", "Don't Know", "Morepork"
	CallType string // "Duet", "Female", "Male" (optional)
}
// ClassifyConfig holds the configuration for classification
type ClassifyConfig struct {
	Folder    string
	File      string
	Filter    string
	Species   string // scope to this species (optional)
	CallType  string // scope to this calltype within species (optional)
	Certainty int    // scope to this certainty value, -1 = no filter (optional)
	Sample    int    // random sample percentage 1-99, -1 = no sampling, 100 = no-op
	Goto      string // goto this file on startup (optional, basename match)
	Reviewer  string
	Color     bool
	ImageSize int // spectrogram display size in pixels (0 = default)
	Sixel     bool
	ITerm     bool
	Bindings  []KeyBinding
	// SecondaryBindings maps a primary binding key to per-species calltype
	// keys. Invoked via Shift+primary-key: the species is labeled without
	// advancing, and the next key is interpreted as a calltype.
	SecondaryBindings map[string]map[string]string
	Night             bool
	Day               bool
	Lat               float64
	Lng               float64
	Timezone          string
}
// ClassifyState holds the current state for TUI
type ClassifyState struct {
	Config            ClassifyConfig
	DataFiles         []*utils.DataFile
	filteredSegs      [][]*utils.Segment // cached at load time, parallel to DataFiles
	totalSegs         int                // pre-computed total segment count
	FileIdx           int
	SegmentIdx        int
	Dirty             bool
	Player            *utils.AudioPlayer
	PlaybackSpeed     float64 // Current playback speed (1.0 = normal, 0.5 = half speed)
	TimeFilteredCount int     // files skipped by --night or --day filter
}
// BindingResult represents parsed key result
type BindingResult struct {
	Species  string
	CallType string // empty string = remove calltype
}
// LoadDataFiles loads all .data files for classification
// findDataFilePaths resolves the list of .data file paths from config.
func findDataFilePaths(config ClassifyConfig) ([]string, error) {
	if config.File != "" {
		return []string{config.File}, nil
	}
	paths, err := utils.FindDataFiles(config.Folder)
	if err != nil {
		return nil, fmt.Errorf("find data files: %w", err)
	}
	return paths, nil
}
// filterDataFileSegments applies segment and day/night filters to a single data file.
// Returns the filtered segments and whether the file should be kept.
// If the file is filtered out (no matching segments, or time-of-day), returns nil, false.
func filterDataFileSegments(df *utils.DataFile, config ClassifyConfig) ([]*utils.Segment, bool, int) {
	segs := filterSegmentsByLabel(df.Segments, config)
	if segs == nil {
		return nil, false, 0
	}
	timeFiltered := 0
	if config.Night || config.Day {
		keep, tf := filterByTimeOfDay(df.FilePath, config)
		if !keep {
			return nil, false, tf
		}
	}
	return segs, true, timeFiltered
}
// filterSegmentsByLabel applies label/species/certainty filters, returning matching segments.
// Returns nil if no segments match (caller should skip the file).
func filterSegmentsByLabel(segments []*utils.Segment, config ClassifyConfig) []*utils.Segment {
	hasFilter := config.Filter != "" || config.Species != "" || config.Certainty >= 0
	if !hasFilter {
		return segments
	}
	var segs []*utils.Segment
	for _, seg := range segments {
		if seg.SegmentMatchesFilters(config.Filter, config.Species, config.CallType, config.Certainty) {
			segs = append(segs, seg)
		}
	}
	return segs // nil if empty, caller treats as "skip"
}
// filterByTimeOfDay checks --night/--day time-of-day filter for a .data file.
// Returns (keep, timeFilteredCount).
func filterByTimeOfDay(dataFilePath string, config ClassifyConfig) (bool, int) {
	wavPath := filepath.Clean(strings.TrimSuffix(dataFilePath, ".data"))
	result, err := IsNight(IsNightInput{
		FilePath: wavPath,
		Lat:      config.Lat,
		Lng:      config.Lng,
		Timezone: config.Timezone,
	})
	if err != nil {
		fmt.Fprintf(os.Stderr, "warning: skipping %s (isnight error: %v)\n", wavPath, err)
		return false, 1
	}
	if config.Night && !result.SolarNight {
		return false, 1
	}
	if config.Day && !result.DiurnalActive {
		return false, 1
	}
	return true, 0
}
func LoadDataFiles(config ClassifyConfig) (*ClassifyState, error) {
	dataFiles, err := parseAndSortDataFiles(config)
	if err != nil {
		return nil, err
	}
	kept, cachedSegs, timeFiltered := filterDataFiles(dataFiles, config)
	if config.Sample > 0 && config.Sample < 100 {
		rng := rand.New(rand.NewSource(time.Now().UnixNano()))
		kept, cachedSegs = applySampling(kept, cachedSegs, config.Sample, rng)
	}
	return buildClassifyState(config, kept, cachedSegs, timeFiltered)
}
// parseAndSortDataFiles finds, parses, and sorts .data files from the config.
func parseAndSortDataFiles(config ClassifyConfig) ([]*utils.DataFile, error) {
	filePaths, err := findDataFilePaths(config)
	if err != nil {
		return nil, err
	}
	if len(filePaths) == 0 {
		return nil, fmt.Errorf("no .data files found")
	}
	var dataFiles []*utils.DataFile
	for _, path := range filePaths {
		df, err := utils.ParseDataFile(path)
		if err != nil {
			continue
		}
		dataFiles = append(dataFiles, df)
	}
	if len(dataFiles) == 0 {
		return nil, fmt.Errorf("no valid .data files")
	}
	sort.Slice(dataFiles, func(i, j int) bool {
		return dataFiles[i].FilePath < dataFiles[j].FilePath
	})
	return dataFiles, nil
}
// filterDataFiles applies segment filters to each data file, returning kept files and their segments.
func filterDataFiles(dataFiles []*utils.DataFile, config ClassifyConfig) ([]*utils.DataFile, [][]*utils.Segment, int) {
	var kept []*utils.DataFile
	var cachedSegs [][]*utils.Segment
	var timeFiltered int
	for _, df := range dataFiles {
		segs, keep, tf := filterDataFileSegments(df, config)
		timeFiltered += tf
		if !keep {
			continue
		}
		kept = append(kept, df)
		cachedSegs = append(cachedSegs, segs)
	}
	return kept, cachedSegs, timeFiltered
}
// buildClassifyState constructs the ClassifyState, handling --goto file positioning.
func buildClassifyState(config ClassifyConfig, dataFiles []*utils.DataFile, filteredSegs [][]*utils.Segment, timeFiltered int) (*ClassifyState, error) {
	total := 0
	for _, segs := range filteredSegs {
		total += len(segs)
	}
	state := &ClassifyState{
		Config:            config,
		DataFiles:         dataFiles,
		filteredSegs:      filteredSegs,
		totalSegs:         total,
		TimeFilteredCount: timeFiltered,
	}
	if config.Goto == "" {
		return state, nil
	}
	for i, df := range state.DataFiles {
		base := df.FilePath[strings.LastIndex(df.FilePath, "/")+1:]
		if base == config.Goto {
			state.FileIdx = i
			return state, nil
		}
	}
	return nil, fmt.Errorf("goto file not found (or has no matching segments): %s", config.Goto)
}
// applySampling randomly selects sample% of segments from the filtered set.
// The returned files and segments preserve the original chronological order.
func applySampling(kept []*utils.DataFile, cachedSegs [][]*utils.Segment, sample int, rng *rand.Rand) ([]*utils.DataFile, [][]*utils.Segment) {
	flat := make([]struct{ fileIdx, segIdx int }, 0)
	for fi, segs := range cachedSegs {
		for si := range segs {
			flat = append(flat, struct{ fileIdx, segIdx int }{fi, si})
		}
	}
	targetCount := max(len(flat)*sample/100, 1)
	rng.Shuffle(len(flat), func(i, j int) { flat[i], flat[j] = flat[j], flat[i] })
	selected := flat[:targetCount]
	// Restore chronological order before rebuilding
	sort.Slice(selected, func(i, j int) bool {
		if selected[i].fileIdx != selected[j].fileIdx {
			return selected[i].fileIdx < selected[j].fileIdx
		}
		return selected[i].segIdx < selected[j].segIdx
	})
	newCached := make([][]*utils.Segment, len(cachedSegs))
	for _, ref := range selected {
		newCached[ref.fileIdx] = append(newCached[ref.fileIdx], cachedSegs[ref.fileIdx][ref.segIdx])
	}
	var newKept []*utils.DataFile
	var finalCached [][]*utils.Segment
	for i, segs := range newCached {
		if len(segs) > 0 {
			newKept = append(newKept, kept[i])
			finalCached = append(finalCached, segs)
		}
	}
	return newKept, finalCached
}
// FilteredSegs returns the cached filtered segments parallel to DataFiles.
func (s *ClassifyState) FilteredSegs() [][]*utils.Segment {
	return s.filteredSegs
}
// CurrentFile returns the current data file
func (s *ClassifyState) CurrentFile() *utils.DataFile {
	if s.FileIdx >= len(s.DataFiles) {
		return nil
	}
	return s.DataFiles[s.FileIdx]
}
// CurrentSegment returns the current segment
func (s *ClassifyState) CurrentSegment() *utils.Segment {
	if s.FileIdx >= len(s.filteredSegs) {
		return nil
	}
	segs := s.filteredSegs[s.FileIdx]
	if s.SegmentIdx >= len(segs) {
		return nil
	}
	return segs[s.SegmentIdx]
}
// TotalSegments returns total segments to review
func (s *ClassifyState) TotalSegments() int {
	return s.totalSegs
}
// CurrentSegmentNumber returns 1-based segment number
func (s *ClassifyState) CurrentSegmentNumber() int {
	count := 0
	for i := 0; i < s.FileIdx; i++ {
		count += len(s.filteredSegs[i])
	}
	return count + s.SegmentIdx + 1
}
// NextSegment moves to the next segment, returns false if at end
func (s *ClassifyState) NextSegment() bool {
	if s.FileIdx >= len(s.filteredSegs) {
		return false
	}
	segs := s.filteredSegs[s.FileIdx]
	if s.SegmentIdx+1 < len(segs) {
		s.SegmentIdx++
		return true
	}
	// Move to next file
	if s.FileIdx+1 < len(s.DataFiles) {
		s.FileIdx++
		s.SegmentIdx = 0
		return true
	}
	return false
}
// PrevSegment moves to the previous segment, returns false if at start
func (s *ClassifyState) PrevSegment() bool {
	if s.SegmentIdx > 0 {
		s.SegmentIdx--
		return true
	}
	// Move to previous file
	if s.FileIdx > 0 {
		s.FileIdx--
		segs := s.filteredSegs[s.FileIdx]
		s.SegmentIdx = max(len(segs)-1, 0)
		return true
	}
	return false
}
// ParseKeyBuffer parses a single key into binding result
func (s *ClassifyState) ParseKeyBuffer(key string) *BindingResult {
	for _, b := range s.Config.Bindings {
		if b.Key == key {
			return &BindingResult{
				Species:  b.Species,
				CallType: b.CallType,
			}
		}
	}
	return nil
}
// SetComment sets the comment on the current segment's filter label.
// Returns the previous comment (for undo) or empty string if none.
func (s *ClassifyState) SetComment(comment string) string {
	seg := s.CurrentSegment()
	if seg == nil {
		return ""
	}
	df := s.CurrentFile()
	if df == nil {
		return ""
	}
	// Set reviewer
	df.Meta.Reviewer = s.Config.Reviewer
	// Get labels matching filter
	filterLabels := seg.GetFilterLabels(s.Config.Filter)
	var oldComment string
	if len(filterLabels) == 0 {
		// No matching labels, add new one with comment
		label := &utils.Label{
			Species:   "Don't Know",
			Certainty: 0,
			Filter:    s.Config.Filter,
			Comment:   comment,
		}
		seg.Labels = append(seg.Labels, label)
	} else {
		// Set comment on first matching label
		oldComment = filterLabels[0].Comment
		filterLabels[0].Comment = comment
	}
	s.Dirty = true
	return oldComment
}
// GetCurrentComment returns the comment on the current segment's filter label.
func (s *ClassifyState) GetCurrentComment() string {
	seg := s.CurrentSegment()
	if seg == nil {
		return ""
	}
	filterLabels := seg.GetFilterLabels(s.Config.Filter)
	if len(filterLabels) == 0 {
		return ""
	}
	return filterLabels[0].Comment
}
// ApplyBinding applies a binding result to the current segment
func (s *ClassifyState) ApplyBinding(result *BindingResult) {
	seg := s.CurrentSegment()
	if seg == nil {
		return
	}
	df := s.CurrentFile()
	if df == nil {
		return
	}
	// Set reviewer
	df.Meta.Reviewer = s.Config.Reviewer
	// Get labels matching filter
	filterLabels := seg.GetFilterLabels(s.Config.Filter)
	// Determine certainty: 0 for Don't Know, 100 for others
	certainty := 100
	if result.Species == "Don't Know" {
		certainty = 0
	}
	if len(filterLabels) == 0 {
		// No matching labels, add new one
		seg.Labels = append(seg.Labels, &utils.Label{
			Species:   result.Species,
			Certainty: certainty,
			Filter:    s.Config.Filter,
			CallType:  result.CallType,
		})
	} else {
		// Edit first matching label, remove rest
		filterLabels[0].Species = result.Species
		filterLabels[0].Certainty = certainty
		filterLabels[0].CallType = result.CallType // always set (empty = remove)
		// Remove extra matching labels
		if len(filterLabels) > 1 {
			var newLabels []*utils.Label
			for _, l := range seg.Labels {
				keep := !slices.Contains(filterLabels[1:], l)
				if keep {
					newLabels = append(newLabels, l)
				}
			}
			seg.Labels = newLabels
		}
	}
	// Re-sort labels
	sort.Slice(seg.Labels, func(i, j int) bool {
		return seg.Labels[i].Species < seg.Labels[j].Species
	})
	s.Dirty = true
}
// ApplyCallTypeOnly sets the CallType on the current segment's first
// filter-matching label. Used after a Shift+primary keypress labeled the
// species and we now receive the secondary key for the calltype.
// No-op if there is no matching label to update.
func (s *ClassifyState) ApplyCallTypeOnly(callType string) {
	seg := s.CurrentSegment()
	if seg == nil {
		return
	}
	df := s.CurrentFile()
	if df == nil {
		return
	}
	filterLabels := seg.GetFilterLabels(s.Config.Filter)
	if len(filterLabels) == 0 {
		return
	}
	df.Meta.Reviewer = s.Config.Reviewer
	filterLabels[0].CallType = callType
	s.Dirty = true
}
// HasSecondary reports whether the given primary key has any secondary
// (calltype) bindings configured.
func (s *ClassifyState) HasSecondary(primaryKey string) bool {
	return len(s.Config.SecondaryBindings[primaryKey]) > 0
}
// ConfirmLabel upgrades the current segment's existing filter label certainty
// to 100. Returns true if a write is needed (label existed and was below 100).
// Returns false for Don't Know (certainty=0) — confirming a Don't Know is a no-op;
// the caller should just advance to the next segment.
func (s *ClassifyState) ConfirmLabel() bool {
	seg := s.CurrentSegment()
	if seg == nil {
		return false
	}
	filterLabels := seg.GetFilterLabels(s.Config.Filter)
	if len(filterLabels) == 0 {
		return false
	}
	if filterLabels[0].Certainty == 0 {
		return false
	}
	if filterLabels[0].Certainty == 100 {
		return false
	}
	df := s.CurrentFile()
	if df == nil {
		return false
	}
	df.Meta.Reviewer = s.Config.Reviewer
	filterLabels[0].Certainty = 100
	s.Dirty = true
	return true
}
// Save saves the current file
func (s *ClassifyState) Save() error {
	df := s.CurrentFile()
	if df == nil {
		return nil
	}
	if !s.Dirty {
		return nil
	}
	err := df.Write(df.FilePath)
	if err != nil {
		return err
	}
	s.Dirty = false
	return nil
}
// getFilterLabel returns the label matching the current filter, or first label if no filter.
func (s *ClassifyState) getFilterLabel(seg *utils.Segment) *utils.Label {
	if s.Config.Filter == "" {
		if len(seg.Labels) > 0 {
			return seg.Labels[0]
		}
		return nil
	}
	for _, label := range seg.Labels {
		if label.Filter == s.Config.Filter {
			return label
		}
	}
	return nil
}
// getOrCreateFilterLabel gets existing label or creates new one for the current filter.
func (s *ClassifyState) getOrCreateFilterLabel(seg *utils.Segment) *utils.Label {
	label := s.getFilterLabel(seg)
	if label != nil {
		return label
	}
	// Create new label
	label = &utils.Label{
		Species:   "Don't Know",
		Certainty: 0,
		Filter:    s.Config.Filter,
	}
	seg.Labels = append(seg.Labels, label)
	s.Dirty = true
	return label
}
// HasBookmark returns true if current segment has a bookmark on the filter label.
func (s *ClassifyState) HasBookmark() bool {
	seg := s.CurrentSegment()
	if seg == nil {
		return false
	}
	label := s.getFilterLabel(seg)
	return label != nil && label.Bookmark
}
// ToggleBookmark toggles the bookmark on the current segment's filter label.
func (s *ClassifyState) ToggleBookmark() {
	seg := s.CurrentSegment()
	if seg == nil {
		return
	}
	df := s.CurrentFile()
	if df == nil {
		return
	}
	// Set reviewer
	df.Meta.Reviewer = s.Config.Reviewer
	label := s.getOrCreateFilterLabel(seg)
	label.Bookmark = !label.Bookmark
	s.Dirty = true
}
// NextBookmark navigates to the next bookmark, wrapping around if needed.
// Returns false if no bookmarks found (back at start position).
func (s *ClassifyState) NextBookmark() bool {
	startFile := s.FileIdx
	startSeg := s.SegmentIdx
	first := true
	for {
		// Advance to next segment
		if !s.NextSegment() {
			// Wrap to start of folder
			s.FileIdx = 0
			s.SegmentIdx = 0
		}
		// Check if we've looped back to start
		if !first && s.FileIdx == startFile && s.SegmentIdx == startSeg {
			return false // full circle, no bookmark found
		}
		first = false
		// Check if current segment has bookmark
		if s.hasFilterBookmark() {
			return true
		}
	}
}
// PrevBookmark navigates to the previous bookmark, wrapping around if needed.
// Returns false if no bookmarks found (back at start position).
func (s *ClassifyState) PrevBookmark() bool {
	startFile := s.FileIdx
	startSeg := s.SegmentIdx
	first := true
	for {
		// Move to previous segment
		if !s.PrevSegment() {
			// Wrap to end of folder
			s.FileIdx = len(s.DataFiles) - 1
			segs := s.filteredSegs[s.FileIdx]
			s.SegmentIdx = max(len(segs)-1, 0)
		}
		// Check if we've looped back to start
		if !first && s.FileIdx == startFile && s.SegmentIdx == startSeg {
			return false // full circle, no bookmark found
		}
		first = false
		// Check if current segment has bookmark
		if s.hasFilterBookmark() {
			return true
		}
	}
}
// hasFilterBookmark checks if current segment has bookmark on filter-matching label.
func (s *ClassifyState) hasFilterBookmark() bool {
	seg := s.CurrentSegment()
	if seg == nil {
		return false
	}
	label := s.getFilterLabel(seg)
	return label != nil && label.Bookmark
}
// FormatLabels formats labels for display
func FormatLabels(labels []*utils.Label, filter string) string {
	var parts []string
	for _, l := range labels {
		if filter != "" && l.Filter != filter {
			continue
		}
		part := l.Species
		if l.CallType != "" {
			part += "/" + l.CallType
		}
		part += fmt.Sprintf(" (%d%%)", l.Certainty)
		if l.Filter != "" {
			part += " [" + l.Filter + "]"
		}
		if l.Comment != "" {
			part += fmt.Sprintf(" \"%s\"", l.Comment)
		}
		parts = append(parts, part)
	}
	return strings.Join(parts, ", ")
}

File addition: avianz_types.go (----------)

[0.67281]

package calls
// AviaNZMeta is the metadata element in a .data file
type AviaNZMeta struct {
	Operator string  `json:"Operator"`
	Reviewer *string `json:"Reviewer,omitempty"`
	Duration float64 `json:"Duration"`
}
// AviaNZLabel represents a species label in a segment
type AviaNZLabel struct {
	Species   string `json:"species"`
	Certainty int    `json:"certainty"`
	Filter    string `json:"filter"`
}
// AviaNZSegment represents a detection segment [start, end, freq_low, freq_high, labels]
type AviaNZSegment [5]any

File addition: resolve.go (----------)

[6.790921]

package db
// ResolveDBPath returns the inputPath if non-empty, otherwise returns the
// fallback path. This is used by tools that accept an explicit DBPath in
// their Input struct but need a default when not provided.
func ResolveDBPath(inputPath, fallback string) string {
	if inputPath != "" {
		return inputPath
	}
	return fallback
}

Deletion in cmd/sql.go at line 57 [6.1041743]
B:BD[6.1043575] → [6.1043575:1043602]
```
	tools.SetDBPath(*dbPath)
```
Deletion in cmd/pattern.go at line 65 [6.1053931]
B:BD[6.1055793] → [6.1055793:1055819]
```
	tools.SetDBPath(*dbPath)
```
Deletion in cmd/pattern.go at line 124 [6.1053931]
B:BD[6.1057759] → [6.1057759:1057785]
```
	tools.SetDBPath(*dbPath)
```
Deletion in cmd/location.go at line 68 [6.1060595]
B:BD[6.1063427] → [6.1063427:1063453]
```
	tools.SetDBPath(*dbPath)
```
Deletion in cmd/location.go at line 118 [6.1060595]
B:BD[6.1065252] → [10.19731:19757]
```
	tools.SetDBPath(*dbPath)
```
Replacement in cmd/isnight.go at line 9 [6.1066325]
B:BD[6.1066387] → [6.1066387:1066403]
```
	"skraak/tools"
```
[6.1066387]
[6.1066403]
```
	"skraak/tools/calls"
```
Replacement in cmd/isnight.go at line 72 [6.1066325]
B:BD[6.1069377] → [6.1069377:1069427]
```
	output, err := tools.IsNight(tools.IsNightInput{
```
[6.1069377]
[6.1069427]
```
	output, err := calls.IsNight(calls.IsNightInput{
```
Replacement in cmd/import.go at line 10 [6.1070018]
B:BD[6.1070091] → [6.1070091:1070107]
```
	"skraak/tools"
```
[6.1070091]
[6.1070107]
```
	imp "skraak/tools/import"
```
Deletion in cmd/import.go at line 94 [6.1070018]
B:BD[6.1074064] → [6.1074064:1074090]
```
	tools.SetDBPath(*dbPath)
```
Replacement in cmd/import.go at line 97 [6.1070018]
B:BD[6.1074123] → [6.1074123:1074160]
```
	input := tools.BulkFileImportInput{
```
[6.1074123]
[4.7134]
```
	input := imp.BulkFileImportInput{
```

Replacement in cmd/import.go at line 111 [6.1070018]

B:BD[6.1074584] → [6.1074584:1074650]

	output, err := tools.BulkFileImport(context.Background(), input)

[6.1074584]

[6.1074650]

	output, err := imp.BulkFileImport(context.Background(), input)

Deletion in cmd/import.go at line 164 [6.1070018]
∅:D[26.1115] → [6.1077012:1077039]
∅:D[9.3606] → [6.1077012:1077039]
B:BD[6.1077012] → [6.1077012:1077039]
```
	tools.SetDBPath(*dbPath)
```
Replacement in cmd/import.go at line 167 [6.1070018]
B:BD[6.1077072] → [6.1077072:1077105]
```
	input := tools.ImportFileInput{
```
[6.1077072]
[4.7159]
```
	input := imp.ImportFileInput{
```

Replacement in cmd/import.go at line 177 [6.1070018]

B:BD[6.1077273] → [6.1077273:1077335]

	output, err := tools.ImportFile(context.Background(), input)

[6.1077273]

[6.1077335]

	output, err := imp.ImportFile(context.Background(), input)

Deletion in cmd/import.go at line 230 [6.1070018]
∅:D[26.1459] → [6.1079771:1079798]
∅:D[9.3744] → [6.1079771:1079798]
B:BD[6.1079771] → [6.1079771:1079798]
```
	tools.SetDBPath(*dbPath)
```
Replacement in cmd/import.go at line 233 [6.1070018]
B:BD[6.1079831] → [6.1079831:1079870]
```
	input := tools.ImportAudioFilesInput{
```
[6.1079831]
[4.7183]
```
	input := imp.ImportAudioFilesInput{
```

Replacement in cmd/import.go at line 247 [6.1070018]

B:BD[6.1080148] → [6.1080148:1080216]

	output, err := tools.ImportAudioFiles(context.Background(), input)

[6.1080148]

[6.1080216]

	output, err := imp.ImportAudioFiles(context.Background(), input)

Deletion in cmd/import.go at line 334 [6.1070018]
∅:D[26.1860] → [6.1084474:1084501]
∅:D[9.3909] → [6.1084474:1084501]
B:BD[6.1084474] → [6.1084474:1084501]
```
	tools.SetDBPath(*dbPath)
```
Replacement in cmd/import.go at line 337 [6.1070018]
B:BD[6.1084534] → [6.1084534:1084571]
```
	input := tools.ImportSegmentsInput{
```
[6.1084534]
[4.7207]
```
	input := imp.ImportSegmentsInput{
```

Replacement in cmd/import.go at line 358 [6.1070018]

B:BD[6.1085170] → [6.1085170:1085236]

	output, err := tools.ImportSegments(context.Background(), input)

[6.1085170]

[6.1085236]

	output, err := imp.ImportSegments(context.Background(), input)

Deletion in cmd/import.go at line 419 [6.1070018]
∅:D[26.2220] → [6.1088110:1088137]
∅:D[9.3995] → [6.1088110:1088137]
B:BD[6.1088110] → [6.1088110:1088137]
```
	tools.SetDBPath(*dbPath)
```
Replacement in cmd/import.go at line 422 [6.1070018]
B:BD[6.1088170] → [6.1088170:1088211]
```
	input := tools.ImportUnstructuredInput{
```
[6.1088170]
[4.7231]
```
	input := imp.ImportUnstructuredInput{
```

Replacement in cmd/import.go at line 435 [6.1070018]

B:BD[6.1088511] → [6.1088511:1088581]

	output, err := tools.ImportUnstructured(context.Background(), input)

[6.1088511]

[6.1088581]

	output, err := imp.ImportUnstructured(context.Background(), input)

Deletion in cmd/export.go at line 76 [6.1088920]
∅:D[9.4073] → [6.1091992:1092019]
∅:D[8.6873] → [6.1091992:1092019]
B:BD[6.1091992] → [6.1091992:1092019]
```
	tools.SetDBPath(*dbPath)
```
Deletion in cmd/dataset.go at line 53 [6.1092525]
B:BD[6.1094501] → [6.1094501:1094527]
```
	tools.SetDBPath(*dbPath)
```
Deletion in cmd/dataset.go at line 99 [6.1092525]
B:BD[6.1096234] → [6.1096234:1096260]
```
	tools.SetDBPath(*dbPath)
```
Deletion in cmd/cluster.go at line 63 [6.1098846]
B:BD[6.1101538] → [6.1101538:1101564]
```
	tools.SetDBPath(*dbPath)
```
Deletion in cmd/cluster.go at line 119 [6.1098846]
B:BD[6.1103457] → [6.1103457:1103483]
```
	tools.SetDBPath(*dbPath)
```
Replacement in cmd/calls_push_certainty.go at line 8 [6.1103981]
B:BD[6.1104046] → [6.1104046:1104062]
```
	"skraak/tools"
```
[6.1104046]
[6.1104062]
```
	"skraak/tools/calls"
```
Replacement in cmd/calls_push_certainty.go at line 135 [6.1103981]
∅:D[24.1572] → [6.1108868:1108906]
B:BD[6.1108868] → [6.1108868:1108906]
```
	config := tools.PushCertaintyConfig{
```
[24.1572]
[18.26591]
```
	config := calls.PushCertaintyConfig{
```
Replacement in cmd/calls_push_certainty.go at line 149 [6.1103981]
B:BD[6.1109142] → [6.1109142:1109186]
```
	result, err := tools.PushCertainty(config)
```
[6.1109142]
[6.1109186]
```
	result, err := calls.PushCertainty(config)
```
Replacement in cmd/calls_propagate.go at line 9 [6.1109798]
B:BD[6.1109860] → [6.1109860:1109876]
```
	"skraak/tools"
```
[6.1109860]
[6.1109876]
```
	"skraak/tools/calls"
```

Replacement in cmd/calls_propagate.go at line 121 [6.1109798]

B:BD[6.1115880] → [6.1115880:1115945]

		result, err := tools.CallsPropagate(tools.CallsPropagateInput{

[6.1115880]

[6.1115945]

		result, err := calls.CallsPropagate(calls.CallsPropagateInput{

Replacement in cmd/calls_propagate.go at line 136 [6.1109798]

B:BD[6.1116268] → [6.1116268:1116344]

	result, err := tools.CallsPropagateFolder(tools.CallsPropagateFolderInput{

[6.1116268]

[6.1116344]

	result, err := calls.CallsPropagateFolder(calls.CallsPropagateFolderInput{

Replacement in cmd/calls_modify.go at line 10 [6.1117125]
B:BD[6.1117201] → [6.1117201:1117217]
```
	"skraak/tools"
```
[6.1117201]
[6.1117217]
```
	"skraak/tools/calls"
```
Replacement in cmd/calls_modify.go at line 159 [6.1117125]
∅:D[16.27390] → [6.1123339:1123373]
B:BD[6.1123339] → [6.1123339:1123373]
```
	input := tools.CallsModifyInput{
```
[16.27390]
[16.27391]
```
	input := calls.CallsModifyInput{
```
Replacement in cmd/calls_modify.go at line 172 [6.1117125]
B:BD[6.1123589] → [6.1123589:1123630]
```
	result, err := tools.CallsModify(input)
```
[6.1123577]
[6.1123630]
```
	result, err := calls.CallsModify(input)
```
Replacement in cmd/calls_detect_anomalies.go at line 8 [6.1123838]
B:BD[6.1123892] → [6.1123892:1123908]
```
	"skraak/tools"
```
[6.1123892]
[6.1123908]
```
	"skraak/tools/calls"
```

Replacement in cmd/calls_detect_anomalies.go at line 110 [6.1123838]

B:BD[6.1127861] → [6.1127861:1127927]

	output, err := tools.DetectAnomalies(tools.DetectAnomaliesInput{

[6.1127861]

[6.1127927]

	output, err := calls.DetectAnomalies(calls.DetectAnomaliesInput{

Replacement in cmd/calls_clip_labels.go at line 10 [6.1128572]
B:BD[6.1128642] → [6.1128642:1128658]
```
	"skraak/tools"
```
[6.1128642]
[6.1128658]
```
	"skraak/tools/calls"
```
Replacement in cmd/calls_clip_labels.go at line 52 [6.1128572]
B:BD[6.1131079] → [6.1131079:1131117]
```
	input := tools.CallsClipLabelsInput{
```
[6.1131079]
[6.1131117]
```
	input := calls.CallsClipLabelsInput{
```
Replacement in cmd/calls_clip_labels.go at line 72 [6.1128572]
B:BD[6.1131766] → [6.1131766:1131808]
```
	out, err := tools.CallsClipLabels(input)
```
[6.1131766]
[6.1131808]
```
	out, err := calls.CallsClipLabels(input)
```
Replacement in cmd/calls_clip.go at line 9 [6.1133083]
B:BD[6.1133159] → [6.1133159:1133175]
```
	"skraak/tools"
```
[6.1133159]
[6.1133175]
```
	"skraak/tools/calls"
```
Replacement in cmd/calls_clip.go at line 148 [6.1133083]
B:BD[6.1141025] → [6.1141025:1141057]
```
	input := tools.CallsClipInput{
```
[6.1141025]
[18.28933]
```
	input := calls.CallsClipInput{
```
Replacement in cmd/calls_clip.go at line 164 [6.1133083]
B:BD[6.1141380] → [6.1141380:1141419]
```
	result, err := tools.CallsClip(input)
```
[6.1141380]
[6.1141419]
```
	result, err := calls.CallsClip(input)
```
Replacement in cmd/calls_classify.go at line 10 [6.1141693]
B:BD[6.1141784] → [6.1141784:1141800]
```
	"skraak/tools"
```
[6.1141784]
[6.1141800]
```
	"skraak/tools/calls"
```

Replacement in cmd/calls_classify.go at line 155 [6.1141693]

B:BD[26.8424] → [26.8424:8511]

func validateBindings(cfg *utils.Config, cfgPath string) ([]tools.KeyBinding, error) {

[26.8424]

[6.1149871]

func validateBindings(cfg *utils.Config, cfgPath string) ([]calls.KeyBinding, error) {

Replacement in cmd/calls_classify.go at line 157 [6.1141693]

B:BD[6.1149949] → [6.1149949:1150018]

	bindings := make([]tools.KeyBinding, 0, len(cfg.Classify.Bindings))

[6.1149949]

[6.1150018]

	bindings := make([]calls.KeyBinding, 0, len(cfg.Classify.Bindings))

Replacement in cmd/calls_classify.go at line 236 [6.1141693]
B:BD[6.1151645] → [6.1151645:1151678]
```
	config := tools.ClassifyConfig{
```
[6.1151645]
[27.12323]
```
	config := calls.ClassifyConfig{
```
Replacement in cmd/calls_classify.go at line 260 [6.1141693]
B:BD[6.1152375] → [6.1152375:1152418]
```
	state, err := tools.LoadDataFiles(config)
```
[6.1152375]
[6.1152418]
```
	state, err := calls.LoadDataFiles(config)
```
Replacement in cmd/calls_classify.go at line 290 [6.1141693]
B:BD[6.1153232] → [6.1153232:1153276]
```
func parseBind(s string) tools.KeyBinding {
```
[6.1153232]
[6.1153276]
```
func parseBind(s string) calls.KeyBinding {
```
Replacement in cmd/calls_classify.go at line 302 [6.1141693]
B:BD[6.1153592] → [6.1153592:1153619]
```
		return tools.KeyBinding{
```
[6.1153592]
[6.1153619]
```
		return calls.KeyBinding{
```
Replacement in cmd/calls_classify.go at line 310 [6.1141693]
B:BD[6.1153718] → [6.1153718:1153744]
```
	return tools.KeyBinding{
```
[6.1153718]
[6.1153744]
```
	return calls.KeyBinding{
```
Replacement in cmd/calls.go at line 9 [6.1153827]
B:BD[6.1153889] → [6.1153889:1153905]
```
	"skraak/tools"
```
[6.1153889]
[6.1153905]
```
	"skraak/tools/calls"
```

Replacement in cmd/calls.go at line 148 [6.1153827]

B:BD[6.1160560] → [6.1160560:1160615]

		filterName = tools.ParseFilterFromFilename(*csvPath)

[6.1160560]

[6.1160615]

		filterName = calls.ParseFilterFromFilename(*csvPath)

Replacement in cmd/calls.go at line 154 [6.1153827]
B:BD[6.1160882] → [6.1160882:1160919]
```
	input := tools.CallsFromPredsInput{
```
[6.1160882]
[6.1160919]
```
	input := calls.CallsFromPredsInput{
```
Replacement in cmd/calls.go at line 181 [6.1153827]
B:BD[6.1161814] → [6.1161814:1161858]
```
	output, err := tools.CallsFromPreds(input)
```
[6.1161814]
[6.1161858]
```
	output, err := calls.CallsFromPreds(input)
```
Replacement in cmd/calls.go at line 234 [6.1153827]
B:BD[6.1163871] → [6.1163871:1163909]
```
	input := tools.CallsShowImagesInput{
```
[6.1163871]
[6.1163909]
```
	input := calls.CallsShowImagesInput{
```
Replacement in cmd/calls.go at line 247 [6.1153827]
B:BD[6.1164198] → [6.1164198:1164243]
```
	output, err := tools.CallsShowImages(input)
```
[6.1164198]
[6.1164243]
```
	output, err := calls.CallsShowImages(input)
```
Replacement in cmd/calls.go at line 312 [6.1153827]
B:BD[6.1167078] → [6.1167078:1167115]
```
	input := tools.CallsFromBirdaInput{
```
[6.1167078]
[6.1167115]
```
	input := calls.CallsFromBirdaInput{
```
Replacement in cmd/calls.go at line 337 [6.1153827]
B:BD[6.1167752] → [6.1167752:1167796]
```
	output, err := tools.CallsFromBirda(input)
```
[6.1167752]
[6.1167796]
```
	output, err := calls.CallsFromBirda(input)
```
Replacement in cmd/calls.go at line 417 [6.1153827]
B:BD[6.1171205] → [6.1171205:1171242]
```
	input := tools.CallsFromRavenInput{
```
[6.1171205]
[6.1171242]
```
	input := calls.CallsFromRavenInput{
```
Replacement in cmd/calls.go at line 442 [6.1153827]
B:BD[6.1171878] → [6.1171878:1171922]
```
	output, err := tools.CallsFromRaven(input)
```
[6.1171878]
[6.1171922]
```
	output, err := calls.CallsFromRaven(input)
```
Replacement in cmd/calls.go at line 544 [6.1153827]
B:BD[6.1176445] → [6.1176445:1176482]
```
	input := tools.CallsSummariseInput{
```
[6.1176445]
[6.1176482]
```
	input := calls.CallsSummariseInput{
```
Replacement in cmd/calls.go at line 555 [6.1153827]
B:BD[6.1176685] → [6.1176685:1176729]
```
	output, err := tools.CallsSummarise(input)
```
[6.1176685]
[6.1176729]
```
	output, err := calls.CallsSummarise(input)
```

Replacement in CLAUDE.md at line 20 [6.1194773]

B:BD[6.1196332] → [3.9861:9940]

tools/*.go       → CLI tools (one file per tool, defines input/output types)

[6.1196332]

[17.7389]

tools/*.go       → CLI tools: sql, export, cluster, dataset, location, pattern, time, prepend
tools/calls/     → Call processing (filesystem .data/WAV, NO database access)
tools/import/    → Import operations (bulk, file, files, segments, unstructured)

Insertion in CHANGELOG.md at line 4 [6.1197933]

[6.1198010]

[5.173]


## [2026-05-12] Stream 7: tools/ package split + SetDBPath removal
Split tools/ into three packages to improve navigation and reduce coupling:
### tools/calls/ (13 source + 11 test + 3 utility files, 4563 lines)
- All calls_* processing — purely filesystem-based, NO database access
- avianz_types.go, parallel_aggregate.go, isnight.go
- Package name: `calls` (import: `skraak/tools/calls`)
### tools/import/ (5 source + 1 test files, 2078 lines)
- import_file, import_files, import_segments, import_unstructured, bulk_file_import
- Package name: `imp` (import: `imp "skraak/tools/import"`)
  (`import` is a Go keyword, so `imp` is used as the package identifier)
### tools/ (8 source + 4 test files, remaining ~1700 lines)
- sql, export, cluster, dataset, location, pattern, time, prepend

Insertion in CHANGELOG.md at line 22 [6.1197933]

[5.174]

### SetDBPath removal
- Removed global `var dbPath string` and `SetDBPath()` from tools/sql.go
- All callers already pass `Input.DBPath` — the global was redundant
- Test files updated: `SetDBPath(testDB)` → `DBPath: testDB` in Input structs
- Added `db.ResolveDBPath()` helper for the resolveDBPath pattern
### depguard updates
- New rules for tools/calls/ and tools/import/ packages
- tui/ may import tools/calls but not tools
- tools/ may not import sub-packages
- tools/calls/ and tools/import/ may not import parent tools/ package
### Cross-boundary dependency resolution
- `resolveDBPath()` → each package calls `db.ResolveDBPath()` directly
- `calls_clip_bench_test.go` path fix: `../audio/` → `../../audio/`
- No unexported symbols cross package boundaries (verified by analysis)

Replacement in .golangci.yml at line 39 [29.217]

B:BD[28.3780] → [28.3780:3822]

        #   cmd → tools, tui, utils, db

[28.3780]

[28.3822]

        #   cmd → tools, tools/calls, tools/import, tui, utils, db
        #   tools/calls → utils, db
        #   tools/import → utils, db

Replacement in .golangci.yml at line 43 [29.217]
B:BD[28.3854] → [28.3854:3887]
```
        #   tui → tools, utils
```
[28.3854]
[28.3887]
```
        #   tui → tools/calls, utils
```

Insertion in .golangci.yml at line 76 [29.217]

[28.4899]

            - pkg: "skraak/tools$"
              desc: "tui must import from tools/calls, not tools"
        calls:
          files:
            - "**/tools/calls/*.go"
          deny:
            - pkg: "skraak/cmd"
              desc: "tools/calls must not import cmd"
            - pkg: "skraak/tools"
              desc: "tools/calls must not import parent package"
            - pkg: "skraak/tui"
              desc: "tools/calls must not import tui"
        import:
          files:
            - "**/tools/import/*.go"
          deny:
            - pkg: "skraak/cmd"
              desc: "tools/import must not import cmd"
            - pkg: "skraak/tools"
              desc: "tools/import must not import parent package"
            - pkg: "skraak/tui"
              desc: "tools/import must not import tui"

Insertion in .golangci.yml at line 106 [29.217]

[28.5137]

            - pkg: "skraak/tools/calls"
              desc: "tools must not import tools/calls (sub-package)"
            - pkg: "skraak/tools/import"
              desc: "tools must not import tools/import (sub-package)"

big tidy up of tools/

Dependencies

In channels

Change contents

Replacement in tui/classify.go at line 15 [6.227156]

Replacement in tui/classify.go at line 96 [6.227156]

Replacement in tui/classify.go at line 121 [6.227156]

Replacement in tui/classify.go at line 124 [6.227156]

Replacement in tui/classify.go at line 369 [6.227156]

Replacement in tui/classify.go at line 550 [6.227156]

Replacement in tui/classify.go at line 666 [6.227156]

Replacement in tui/classify.go at line 800 [6.227156]

Replacement in tui/classify.go at line 832 [6.227156]

Replacement in tui/classify.go at line 849 [6.227156]

File deletion: avianz_types.go

File deletion: parallel_aggregate.go

File deletion: calls_from_common.go

File deletion: isnight.go

File deletion: import_unstructured.go

File deletion: import_segments_test.go

File deletion: import_segments.go

File deletion: import_files.go

File deletion: import_file.go

File deletion: calls_summarise.go

File deletion: calls_show_images.go

File deletion: calls_push_certainty_test.go

File deletion: calls_push_certainty.go

File deletion: calls_propagate_test.go

File deletion: calls_propagate.go

File deletion: calls_modify_test.go

File deletion: calls_modify.go

File deletion: calls_from_raven.go

File deletion: calls_from_preds_test.go

File deletion: calls_from_preds.go

File deletion: calls_from_birda_raven_test.go

File deletion: calls_from_birda.go

File deletion: calls_detect_anomalies_test.go

File deletion: calls_detect_anomalies.go

File deletion: calls_clip_labels_test.go

File deletion: calls_clip_labels.go

File deletion: calls_clip_bench_test.go

File deletion: calls_clip.go

File deletion: calls_classify_test.go

File deletion: calls_classify_load_test.go

File deletion: calls_classify_filter_test.go

File deletion: calls_classify.go

File deletion: bulk_file_import.go

Deletion in tools/update_test.go at line 65 [6.248756]

Insertion in tools/update_test.go at line 71 [6.248756]

Insertion in tools/update_test.go at line 97 [6.248756]

Deletion in tools/update_test.go at line 125 [6.248756]

Replacement in tools/update_test.go at line 127 [6.248756]

Insertion in tools/update_test.go at line 139 [6.248756]

Insertion in tools/update_test.go at line 165 [6.248756]

Deletion in tools/update_test.go at line 199 [6.248756]

Replacement in tools/update_test.go at line 201 [6.248756]

Insertion in tools/update_test.go at line 210 [6.248756]

Insertion in tools/update_test.go at line 226 [6.248756]

Insertion in tools/update_test.go at line 243 [6.248756]

Deletion in tools/update_test.go at line 271 [6.248756]

Insertion in tools/update_test.go at line 275 [6.248756]

Insertion in tools/update_test.go at line 297 [6.248756]

Deletion in tools/update_test.go at line 321 [6.248756]

Replacement in tools/update_test.go at line 324 [6.248756]

Replacement in tools/update_test.go at line 331 [6.248756]

Deletion in tools/sql.go at line 14 [6.259371]

Replacement in tools/sql.go at line 15 [6.259371]

Replacement in tools/sql.go at line 18 [6.259371]

Deletion in tools/pattern_test.go at line 16 [6.276451]

Insertion in tools/pattern_test.go at line 24 [6.276451]

Insertion in tools/pattern_test.go at line 58 [6.276451]

Replacement in tools/pattern_test.go at line 82 [6.276451]

Deletion in tools/pattern_test.go at line 106 [6.276451]

Insertion in tools/pattern_test.go at line 125 [6.276451]

Deletion in tools/pattern_test.go at line 143 [6.276451]

Insertion in tools/pattern_test.go at line 149 [6.276451]

Replacement in tools/pattern_test.go at line 163 [6.276451]

Deletion in tools/integration_test.go at line 15 [6.303931]

Replacement in tools/integration_test.go at line 18 [6.303931]

Replacement in tools/integration_test.go at line 22 [6.303931]