big tidy up of tools/

quietlight

May 11, 2026, 8:36 PM

3DVPQOKB6BX63XSBIYYCPWBL2RBG3LXZS3XPQBANJP2FWVRAOVZQC

Dependencies

[2] JR46EPFZ 2 cyclo fixes
[3] 2Y5U3QPU added gosymdb
[4] IFLKNMMP ck 1
[5] 7Z6JBD3R ck 6+
[6] LBWQJEDH minor refactor and more tests for utils/
[7] LQLC7S3A trying gemini: Inconsistent Standards in @utils/ refactoring
[8] 54GPBNIX added +_ for tui to select segments with no calltype
[9] YUIQQPXY removed --wav-only from calls clip cmd
[10] SMWSHUOW cyclo over 15
[11] GPQSOVBP cyclo complexity over 25
[12] JAT3DXOL cyclo over 15
[13] NS4TDPLN cyclomatic complexity
[14] I4CMOMXF dot files
[15] KLUEQ6X5 cyclo 21+
[16] T2WZBTVF cyclo 22
[17] YE6BZJUK tidy up lat lng timezone api for calls clip cmd
[18] RUVJ3V4N cyclo to 14 now
[19] VYNOHQJW tidied up CLAUDE.md
[20] GE3VNRXL ck 2
[21] 2P27XV3D fixed cyclo over 30
[22] AVQ66WO4 tools/ refactor
[23] KZKLAINJ run out of space on nest, cleaned out
[24] DD3LCTLZ tidy up lat lng timezone api for calls classify and push certainty
[25] WKQ7LFTP refactor of utils/
[26] GVOVKH5R more cyclo refactoring
[27] HYCZTLSZ fixed tests with cyclo over 15
[28] VNFPBXF7 moved dep tests to golangci-lint
[29] ZOSYO3IB ck 3
[30] QFPEKXL5 ck 6
[31] JZRF7OBJ refactor to get db omports out of utils, but still have failing tests, may need updating

Change contents

replacement in tui/classify.go at line 15

[6.227304]→[6.227304:227320](∅→∅)

"skraak/tools"

[6.227304]

[6.227320]

"skraak/tools/calls"
replacement in tui/classify.go at line 96

[6.229112]→[6.229112:229147](∅→∅)

state *tools.ClassifyState

[6.229112]

[6.229147]

state *calls.ClassifyState
replacement in tui/classify.go at line 121

[6.229957]→[6.229957:230002](∅→∅)

func New(state *tools.ClassifyState) Model {

[6.229957]

[6.230002]

func New(state *calls.ClassifyState) Model {
replacement in tui/classify.go at line 124

[6.230116]→[6.230116:230180](∅→∅)

sorted := make([]tools.KeyBinding, len(state.Config.Bindings))

[6.230116]

[6.230180]

sorted := make([]calls.KeyBinding, len(state.Config.Bindings))
replacement in tui/classify.go at line 369

[6.13046]→[6.13046:13119](∅→∅)

m.state.ApplyBinding(&tools.BindingResult{Species: result.Species})

[6.13046]

[6.13119]

m.state.ApplyBinding(&calls.BindingResult{Species: result.Species})
replacement in tui/classify.go at line 550

[6.239778]→[6.239778:239843](∅→∅)

func saveClip(state *tools.ClassifyState, prefix string) error {

[6.239778]

[6.239843]

func saveClip(state *calls.ClassifyState, prefix string) error {
replacement in tui/classify.go at line 666

[6.242744]→[6.242744:242827](∅→∅)

func playCurrentSegmentAtSpeed(state *tools.ClassifyState, speed float64) string {

[6.242744]

[6.242827]

func playCurrentSegmentAtSpeed(state *calls.ClassifyState, speed float64) string {
replacement in tui/classify.go at line 800

[6.245750]→[6.1110:1204](∅→∅)

fmt.Fprintf(b, " • %s\n", tools.FormatLabels([]*utils.Label{l}, m.state.Config.Filter))

[6.245750]

[6.245845]

fmt.Fprintf(b, " • %s\n", calls.FormatLabels([]*utils.Label{l}, m.state.Config.Filter))
replacement in tui/classify.go at line 832

[6.247228]→[6.247228:247337](∅→∅)

func generateSpectrogramImage(state *tools.ClassifyState, dataPath string, seg *utils.Segment) image.Image {

[6.247228]

[6.247337]

func generateSpectrogramImage(state *calls.ClassifyState, dataPath string, seg *utils.Segment) image.Image {
replacement in tui/classify.go at line 849

[6.247958]→[6.247958:248078](∅→∅)

func inlineImageCmd(state *tools.ClassifyState, protocol utils.ImageProtocol, gen uint64, currentGen *uint64) tea.Cmd {

[6.247958]

[6.248078]

func inlineImageCmd(state *calls.ClassifyState, protocol utils.ImageProtocol, gen uint64, currentGen *uint64) tea.Cmd {
file deletion: avianz_types.go (----------)

[6.248737]→[6.524:563](∅→∅),[6.563]→[6.1:1](∅→∅)

package tools

// AviaNZMeta is the metadata element in a .data file
type AviaNZMeta struct {
Operator string `json:"Operator"`
Reviewer *string `json:"Reviewer,omitempty"`
Duration float64 `json:"Duration"`
}

// AviaNZLabel represents a species label in a segment
type AviaNZLabel struct {
Species string `json:"species"`
Certainty int `json:"certainty"`
Filter string `json:"filter"`
}

// AviaNZSegment represents a detection segment [start, end, freq_low, freq_high, labels]
type AviaNZSegment [5]any
file deletion: parallel_aggregate.go (----------)

[6.248737]→[6.2367:2412](∅→∅),[6.2412]→[6.1:1](∅→∅)

package tools

import (
"fmt"
"os"
"path/filepath"
"sort"
"sync/atomic"
)

// parallelResult is the common interface for birda/raven worker results.
type parallelResult interface {
filePath() string
getCalls() []ClusteredCall
wasWritten() bool
wasSkipped() bool
getError() error
}

// aggregateStats holds the collected results from a parallel fan-out/fan-in.
type aggregateStats struct {
calls []ClusteredCall
speciesCount map[string]int
dataFilesWritten int
dataFilesSkipped int
filesProcessed int
filesDeleted int
firstErr error
}

// aggregateResults collects results from a channel of parallelResult values,
// handling error tracking, species counting, optional file deletion, and
// progress reporting. Returns the aggregated stats.
func aggregateResults(
results <-chan parallelResult,
total int,
processed *atomic.Int32,
deleteFiles bool,
progressHandler func(int, int, string),
) aggregateStats {
var stats aggregateStats
stats.speciesCount = make(map[string]int)

for result := range results {
if err := result.getError(); err != nil && stats.firstErr == nil {
stats.firstErr = err
}

if result.wasWritten() {
stats.dataFilesWritten++
}
if result.wasSkipped() {
stats.dataFilesSkipped++
}

for _, call := range result.getCalls() {
stats.calls = append(stats.calls, call)
stats.speciesCount[call.EbirdCode]++
}

stats.filesProcessed++

stats.maybeDeleteFile(deleteFiles, result)

if progressHandler != nil {
current := int(processed.Add(1))
progressHandler(current, total, filepath.Base(result.filePath()))
}
}

return stats
}

// maybeDeleteFile deletes the source file if requested and it was successfully processed.
func (s *aggregateStats) maybeDeleteFile(deleteFiles bool, result parallelResult) {
if !deleteFiles || !result.wasWritten() {
return
}
if err := os.Remove(result.filePath()); err != nil {
if s.firstErr == nil {
s.firstErr = fmt.Errorf("failed to delete %s: %w", result.filePath(), err)
}
} else {
s.filesDeleted++
}
}

// sortCallsByFileAndTime sorts calls by filename, then start time.
func sortCallsByFileAndTime(calls []ClusteredCall) {
sort.Slice(calls, func(i, j int) bool {
if calls[i].File != calls[j].File {
return calls[i].File < calls[j].File
}
return calls[i].StartTime < calls[j].StartTime
})
}
file deletion: calls_from_common.go (----------)

[6.248737]→[6.7706:7750](∅→∅),[6.7750]→[6.1:1](∅→∅)

package tools

import (
"fmt"
"os"
"path/filepath"
"sort"
"sync"
"sync/atomic"
)

// CallsFromSourceInput defines the common input for calls-from-source tools
type CallsFromSourceInput struct {
Folder string `json:"folder"`
File string `json:"file"`
Delete bool `json:"delete"`
ProgressHandler ProgressHandler `json:"-"` // Optional progress callback
}

// CallsFromSourceOutput defines the common output for calls-from-source tools
type CallsFromSourceOutput struct {
Calls []ClusteredCall `json:"calls"`
TotalCalls int `json:"total_calls"`
SpeciesCount map[string]int `json:"species_count"`
DataFilesWritten int `json:"data_files_written"`
DataFilesSkipped int `json:"data_files_skipped"`
FilesProcessed int `json:"files_processed"`
FilesDeleted int `json:"files_deleted"`
Filter string `json:"filter"`
Error *string `json:"error,omitempty"`
}

// CallSource abstracts a source of bird call data (Raven, BirdNET, etc.)
type CallSource interface {
// Name returns the display name (e.g. "Raven", "BirdNET")
Name() string
// FindFiles discovers source files in the given folder
FindFiles(folder string) ([]string, error)
// ProcessFile processes a single source file and returns calls, write/skip status
ProcessFile(path string, cache *DirCache) (calls []ClusteredCall, written, skipped bool, err error)
}

// callsFromSource is the shared entry point for all call source tools.
func callsFromSource(src CallSource, input CallsFromSourceInput) (CallsFromSourceOutput, error) {
var output CallsFromSourceOutput
output.Filter = src.Name()

// Collect source files to process
var files []string
if input.File != "" {
files = []string{input.File}
} else if input.Folder != "" {
var err error
files, err = src.FindFiles(input.Folder)
if err != nil {
errMsg := fmt.Sprintf("Failed to find %s files: %v", src.Name(), err)
output.Error = &errMsg
return output, fmt.Errorf("%s", errMsg)
}
} else {
errMsg := "Either --folder or --file must be specified"
output.Error = &errMsg
return output, fmt.Errorf("%s", errMsg)
}

if len(files) == 0 {
errMsg := fmt.Sprintf("No %s files found", src.Name())
output.Error = &errMsg
return output, fmt.Errorf("%s", errMsg)
}

// Single file or small batch: process sequentially (avoid goroutine overhead)
if len(files) < 10 {
return callsFromSourceSequential(src, input, files)
}

// Large batch: parallel processing with DirCache
return callsFromSourceParallel(src, input, files)
}

// callsFromSourceSequential processes source files one at a time (for small batches)
func callsFromSourceSequential(src CallSource, input CallsFromSourceInput, files []string) (CallsFromSourceOutput, error) {
var output CallsFromSourceOutput
output.Filter = src.Name()

// Build DirCache once for the folder
dirCaches := make(map[string]*DirCache)
if input.Folder != "" {
dirCaches[input.Folder] = NewDirCache(input.Folder)
}

speciesCount := make(map[string]int)
var allCalls []ClusteredCall
dataFilesWritten := 0
dataFilesSkipped := 0
filesProcessed := 0
filesDeleted := 0

for _, file := range files {
dir := filepath.Dir(file)
cache := dirCaches[dir]
if cache == nil {
cache = NewDirCache(dir)
dirCaches[dir] = cache
}

calls, written, skipped, err := src.ProcessFile(file, cache)
if err != nil {
errMsg := fmt.Sprintf("Error processing %s: %v", file, err)
output.Error = &errMsg
return output, fmt.Errorf("%s", errMsg)
}

if written {
dataFilesWritten++
}
if skipped {
dataFilesSkipped++
}

for _, call := range calls {
allCalls = append(allCalls, call)
speciesCount[call.EbirdCode]++
}

filesProcessed++

// Delete if requested and successfully processed
if input.Delete && written {
if err := os.Remove(file); err != nil {
errMsg := fmt.Sprintf("Failed to delete %s: %v", file, err)
output.Error = &errMsg
return output, fmt.Errorf("%s", errMsg)
}
filesDeleted++
}

if input.ProgressHandler != nil {
input.ProgressHandler(filesProcessed, len(files), filepath.Base(file))
}
}

// Sort all calls by file, then start time
sort.Slice(allCalls, func(i, j int) bool {
if allCalls[i].File != allCalls[j].File {
return allCalls[i].File < allCalls[j].File
}
return allCalls[i].StartTime < allCalls[j].StartTime
})

output.Calls = allCalls
output.TotalCalls = len(allCalls)
output.SpeciesCount = speciesCount
output.DataFilesWritten = dataFilesWritten
output.DataFilesSkipped = dataFilesSkipped
output.FilesProcessed = filesProcessed
output.FilesDeleted = filesDeleted

return output, nil
}

// sourceJob represents a single file to process (generic over CallSource)
type sourceJob struct {
filePath string
}

// sourceResult represents the result of processing a single source file
type sourceResult struct {
path string
calls []ClusteredCall
written bool
skipped bool
err error
}

func (r sourceResult) filePath() string { return r.path }
func (r sourceResult) getCalls() []ClusteredCall { return r.calls }
func (r sourceResult) wasWritten() bool { return r.written }
func (r sourceResult) wasSkipped() bool { return r.skipped }
func (r sourceResult) getError() error { return r.err }

// callsFromSourceParallel processes source files concurrently using a worker pool and DirCache
func callsFromSourceParallel(src CallSource, input CallsFromSourceInput, files []string) (CallsFromSourceOutput, error) {
var output CallsFromSourceOutput
output.Filter = src.Name()

total := len(files)
var processed atomic.Int32

// Build DirCache for the folder
dirCaches := &sync.Map{}
if input.Folder != "" {
cache := NewDirCache(input.Folder)
dirCaches.Store(input.Folder, cache)
}

// Create job and result channels
jobs := make(chan sourceJob, total)
results := make(chan parallelResult, total)

// Start workers
var wg sync.WaitGroup
for range DOT_DATA_WORKERS {
wg.Add(1)
go sourceWorker(src, dirCaches, jobs, results, &wg)
}

// Send jobs
for _, file := range files {
jobs <- sourceJob{filePath: file}
}
close(jobs)

// Wait for workers to finish, then close results
go func() {
wg.Wait()
close(results)
}()

// Collect results with progress reporting
stats := aggregateResults(results, total, &processed, input.Delete, input.ProgressHandler)

if stats.firstErr != nil {
errMsg := stats.firstErr.Error()
output.Error = &errMsg
return output, stats.firstErr
}

sortCallsByFileAndTime(stats.calls)

output.Calls = stats.calls
output.TotalCalls = len(stats.calls)
output.SpeciesCount = stats.speciesCount
output.DataFilesWritten = stats.dataFilesWritten
output.DataFilesSkipped = stats.dataFilesSkipped
output.FilesProcessed = stats.filesProcessed
output.FilesDeleted = stats.filesDeleted

return output, nil
}

// sourceWorker processes source files from the jobs channel
func sourceWorker(src CallSource, dirCaches *sync.Map, jobs <-chan sourceJob, results chan<- parallelResult, wg *sync.WaitGroup) {
defer wg.Done()

for job := range jobs {
dir := filepath.Dir(job.filePath)

// Get or create DirCache for this directory
var cache *DirCache
if cached, ok := dirCaches.Load(dir); ok {
cache = cached.(*DirCache)
} else {
cache = NewDirCache(dir)
dirCaches.Store(dir, cache)
}

calls, written, skipped, err := src.ProcessFile(job.filePath, cache)
results <- sourceResult{
path: job.filePath,
calls: calls,
written: written,
skipped: skipped,
err: err,
}
}
}
file deletion: isnight.go (----------)

[6.248737]→[6.303895:303929](∅→∅),[6.303929]→[6.299379:299379](∅→∅)

package tools

import (
"fmt"
"strings"
"time"

"github.com/sixdouglas/suncalc"

"skraak/utils"
)

// IsNightInput defines the input parameters for the isnight tool
type IsNightInput struct {
FilePath string `json:"file_path"`
Lat float64 `json:"lat"`
Lng float64 `json:"lng"`
Timezone string `json:"timezone,omitempty"`
}

// IsNightOutput defines the output structure for the isnight tool
type IsNightOutput struct {
FilePath string `json:"file_path"`
TimestampUTC string `json:"timestamp_utc"`
SolarNight bool `json:"solar_night"`
CivilNight bool `json:"civil_night"`
DiurnalActive bool `json:"diurnal_active"`
MoonPhase float64 `json:"moon_phase"`
DurationSec float64 `json:"duration_seconds"`
TimestampSrc string `json:"timestamp_source"`
MidpointUTC string `json:"midpoint_utc"`
SunriseUTC string `json:"sunrise_utc,omitempty"`
SunsetUTC string `json:"sunset_utc,omitempty"`
DawnUTC string `json:"dawn_utc,omitempty"`
DuskUTC string `json:"dusk_utc,omitempty"`
}

// IsNight determines if a WAV file was recorded at night based on its
// metadata timestamp and the given GPS coordinates.
//
// Timestamp resolution order:
// 1. AudioMoth comment (timezone embedded)
// 2. Filename timestamp + timezone offset (requires --timezone)
// 3. File modification time (system local time)
func IsNight(input IsNightInput) (IsNightOutput, error) {
var output IsNightOutput

// Step 1: Parse WAV header
metadata, err := utils.ParseWAVHeader(input.FilePath)
if err != nil {
return output, fmt.Errorf("WAV header parsing failed: %w", err)
}

output.DurationSec = metadata.Duration

// Step 2: Resolve timestamp (use file mod time as fallback)
tsResult, err := utils.ResolveTimestamp(metadata, input.FilePath, input.Timezone, true, nil)
if err != nil {
return output, fmt.Errorf("cannot determine recording timestamp: %w", err)
}

// Determine timestamp source label
tsSource := "file_mod_time"
if tsResult.IsAudioMoth {
tsSource = "audiomoth_comment"
} else if utils.HasTimestampFilename(input.FilePath) {
tsSource = "filename"
}

// Step 3: Calculate astronomical data using recording midpoint
astroData := utils.CalculateAstronomicalData(
tsResult.Timestamp.UTC(),
metadata.Duration,
input.Lat,
input.Lng,
)

// Step 4: Get sun event times for informational output
midpoint := utils.CalculateMidpointTime(tsResult.Timestamp.UTC(), metadata.Duration)
sunTimes := suncalc.GetTimes(midpoint, input.Lat, input.Lng)

output.FilePath = input.FilePath
output.TimestampUTC = tsResult.Timestamp.UTC().Format(time.RFC3339)
output.SolarNight = astroData.SolarNight
output.CivilNight = astroData.CivilNight
output.MoonPhase = astroData.MoonPhase
output.TimestampSrc = tsSource
output.MidpointUTC = midpoint.Format(time.RFC3339)

if dawn, ok := sunTimes[suncalc.Dawn]; ok && !dawn.Value.IsZero() {
if sunset, ok := sunTimes[suncalc.Sunset]; ok && !sunset.Value.IsZero() {
output.DiurnalActive = !midpoint.Before(dawn.Value) && !midpoint.After(sunset.Value)
}
}

output.SunriseUTC = sunTimeUTC(sunTimes, suncalc.Sunrise)
output.SunsetUTC = sunTimeUTC(sunTimes, suncalc.Sunset)
output.DawnUTC = sunTimeUTC(sunTimes, suncalc.Dawn)
output.DuskUTC = sunTimeUTC(sunTimes, suncalc.Dusk)
}

// String returns a human-readable summary of the isnight result
func (o IsNightOutput) String() string {
var sb strings.Builder
fmt.Fprintf(&sb, "File: %s\n", o.FilePath)
fmt.Fprintf(&sb, "Timestamp (UTC): %s\n", o.TimestampUTC)
fmt.Fprintf(&sb, "Midpoint (UTC): %s\n", o.MidpointUTC)
fmt.Fprintf(&sb, "Duration: %.1f seconds\n", o.DurationSec)
fmt.Fprintf(&sb, "Source: %s\n", o.TimestampSrc)
fmt.Fprintf(&sb, "Solar night: %v\n", o.SolarNight)
fmt.Fprintf(&sb, "Civil night: %v\n", o.CivilNight)
fmt.Fprintf(&sb, "Moon phase: %.2f\n", o.MoonPhase)
if o.SunriseUTC != "" {
fmt.Fprintf(&sb, "Sunrise (UTC): %s\n", o.SunriseUTC)
}
if o.SunsetUTC != "" {
fmt.Fprintf(&sb, "Sunset (UTC): %s\n", o.SunsetUTC)
}
if o.DawnUTC != "" {
fmt.Fprintf(&sb, "Dawn (UTC): %s\n", o.DawnUTC)
}
if o.DuskUTC != "" {
fmt.Fprintf(&sb, "Dusk (UTC): %s\n", o.DuskUTC)
}
return sb.String()
}
// populateSunTimes fills in sun event times and diurnal status from suncalc results.
func populateSunTimes(output *IsNightOutput, sunTimes map[suncalc.DayTimeName]suncalc.DayTime, midpoint time.Time) {
// Diurnal: midpoint is between dawn and sunset
// sunTimeUTC returns the UTC RFC3339 string for a suncalc event, or "" if absent/zero.
func sunTimeUTC(sunTimes map[suncalc.DayTimeName]suncalc.DayTime, name suncalc.DayTimeName) string {
if entry, ok := sunTimes[name]; ok && !entry.Value.IsZero() {
return entry.Value.UTC().Format(time.RFC3339)
}
return ""
}

populateSunTimes(&output, sunTimes, midpoint)

return output, nil
}
file deletion: import_unstructured.go (----------)

[6.248737]→[6.315559:315605](∅→∅),[6.315605]→[6.307677:307677](∅→∅)

package tools

import (
"context"
"fmt"
"io/fs"
"os"
"path/filepath"
"strings"
"time"

"skraak/db"
"skraak/utils"
)

// ImportUnstructuredInput defines the input parameters for importing files into an unstructured dataset
type ImportUnstructuredInput struct {
DatasetID string `json:"dataset_id"`
FolderPath string `json:"folder_path"`
Recursive *bool `json:"recursive,omitempty"`
}

// ImportUnstructuredOutput defines the output structure
type ImportUnstructuredOutput struct {
TotalFiles int `json:"total_files"`
ImportedFiles int `json:"imported_files"`
SkippedFiles int `json:"skipped_files"` // Duplicates
FailedFiles int `json:"failed_files"`
TotalDuration float64 `json:"total_duration_seconds"`
ProcessingTime string `json:"processing_time"`
Errors []utils.FileImportError `json:"errors,omitempty"`
}

// ImportUnstructured imports WAV files into an unstructured dataset
// Files are stored with minimal metadata: hash, duration, sample_rate, file_mod_time as timestamp
// No location/cluster hierarchy, no astronomical data, no AudioMoth parsing
func ImportUnstructured(
ctx context.Context,
input ImportUnstructuredInput,
) (ImportUnstructuredOutput, error) {
startTime := time.Now()
var output ImportUnstructuredOutput

// Default recursive to true
recursive := true
if input.Recursive != nil {
recursive = *input.Recursive
}

// Validate input
if err := validateUnstructuredInput(input); err != nil {
return output, fmt.Errorf("validation failed: %w", err)
}

// Scan for WAV files (no DB needed)
files, scanErrors := scanWavFiles(input.FolderPath, recursive)
output.Errors = append(output.Errors, scanErrors...)
output.TotalFiles = len(files)

if len(files) == 0 {
output.ProcessingTime = time.Since(startTime).String()
return output, nil
}

// Process each file
for _, filePath := range files {
fileResult, procErr := processUnstructuredFile(tx, filePath, input.DatasetID)

if procErr != nil {
output.FailedFiles++
output.Errors = append(output.Errors, utils.FileImportError{
FileName: filepath.Base(filePath),
Error: procErr.Error(),
Stage: utils.StageProcess,
})
continue
}

if fileResult.Skipped {
output.SkippedFiles++
} else {
output.ImportedFiles++
output.TotalDuration += fileResult.Duration
}
}
return nil
})
if err != nil {
return output, err
}

output.ProcessingTime = time.Since(startTime).String()
return output, nil
}

// unstructuredFileResult holds the result of processing a single file
type unstructuredFileResult struct {
Skipped bool // True if duplicate
Duration float64 // Duration in seconds
}

// processUnstructuredFile processes a single WAV file for unstructured import
func processUnstructuredFile(tx *db.LoggedTx, filePath, datasetID string) (*unstructuredFileResult, error) {
result := &unstructuredFileResult{}

// Step 1: Parse WAV header
metadata, err := utils.ParseWAVHeader(filePath)
if err != nil {
return nil, fmt.Errorf("WAV header parsing failed: %w", err)
}

// Step 2: Calculate hash
hash, err := utils.ComputeXXH64(filePath)
if err != nil {
return nil, fmt.Errorf("hash calculation failed: %w", err)
}

// Step 3: Check for duplicate - if exists, skip entirely (do not link to dataset)
_, isDuplicate, err := utils.CheckDuplicateHash(tx, hash)
if err != nil {
return nil, fmt.Errorf("duplicate check failed: %w", err)
}
if isDuplicate {
// File already exists in database - skip completely, do not link to dataset
result.Skipped = true
result.Duration = metadata.Duration
return result, nil
}

// Step 4: Generate file ID
fileID, err := utils.GenerateLongID()
if err != nil {
return nil, fmt.Errorf("ID generation failed: %w", err)
}

// Step 5: Use file modification time as timestamp (no timezone conversion)
timestamp := metadata.FileModTime

// Step 6: Insert into file table
_, err = tx.Exec(`
INSERT INTO file (
id, file_name, xxh64_hash, location_id, cluster_id,
timestamp_local, duration, sample_rate,
maybe_solar_night, maybe_civil_night, moon_phase,
active
) VALUES (?, ?, ?, NULL, NULL, ?, ?, ?, NULL, NULL, NULL, TRUE)
`,
fileID,
filepath.Base(filePath),
hash,
timestamp,
metadata.Duration,
metadata.SampleRate,
)
if err != nil {
return nil, fmt.Errorf("file insert failed: %w", err)
}

// Step 7: Insert into file_dataset table
_, err = tx.Exec(
"INSERT INTO file_dataset (file_id, dataset_id) VALUES (?, ?)",
fileID, datasetID,
)
if err != nil {
return nil, fmt.Errorf("file_dataset insert failed: %w", err)
}

result.Duration = metadata.Duration
return result, nil
}

// validateUnstructuredInput validates the input parameters
func validateUnstructuredInput(input ImportUnstructuredInput) error {
// Validate dataset ID format
if err := utils.ValidateShortID(input.DatasetID, "dataset_id"); err != nil {
return err
}

// Verify folder exists
info, err := os.Stat(input.FolderPath)
if err != nil {
return fmt.Errorf("folder not accessible: %w", err)
}
if !info.IsDir() {
return fmt.Errorf("path is not a directory: %s", input.FolderPath)
}

// Verify dataset exists and is active
if _, err := db.DatasetExistsAndActive(database, input.DatasetID); err != nil {
return err
}

// Verify dataset is 'unstructured' type
if err := db.ValidateDatasetTypeUnstructured(database, input.DatasetID); err != nil {
return err
}

return nil
})
}

// scanWavFiles scans a folder for WAV files
func scanWavFiles(folderPath string, recursive bool) ([]string, []utils.FileImportError) {
var files []string
var errors []utils.FileImportError

walkFunc := func(path string, d fs.DirEntry, err error) error {
if err != nil {
errors = append(errors, utils.FileImportError{
FileName: path,
Error: err.Error(),
Stage: utils.StageScan,
})
return nil
}

// Skip directories if not recursive
if d.IsDir() {
if !recursive && path != folderPath {
return fs.SkipDir
}
return nil
}

// Check for .wav extension (case-insensitive)
if strings.HasSuffix(strings.ToLower(d.Name()), ".wav") {
files = append(files, path)
}

return nil
}

if recursive {
if err := filepath.WalkDir(folderPath, walkFunc); err != nil {
errors = append(errors, utils.FileImportError{
FileName: folderPath,
Error: err.Error(),
Stage: utils.StageScan,
})
}
} else {
// Non-recursive: only scan top-level
entries, err := os.ReadDir(folderPath)
if err != nil {
errors = append(errors, utils.FileImportError{
FileName: folderPath,
Error: err.Error(),
Stage: utils.StageScan,
})
return nil, errors
}

for _, entry := range entries {
if !entry.IsDir() && strings.HasSuffix(strings.ToLower(entry.Name()), ".wav") {
files = append(files, filepath.Join(folderPath, entry.Name()))
}
}
}

return files, errors
}
return db.WithReadDB(resolveDBPath(input.DBPath), func(database *sql.DB) error {
err := db.WithWriteTx(ctx, resolveDBPath(input.DBPath), "import_unstructured", func(database *sql.DB, tx *db.LoggedTx) error {
DBPath string `json:"db_path"`
"database/sql"
file deletion: import_segments_test.go (----------)

[6.248737]→[6.318117:318164](∅→∅),[6.318164]→[6.315607:315607](∅→∅)

package tools

import (
"testing"

"skraak/utils"
)

func TestValidateSegmentImportInput(t *testing.T) {
t.Run("invalid dataset ID - too short", func(t *testing.T) {
input := ImportSegmentsInput{
DatasetID: "abc",
}
err := validateSegmentImportInput(input)
if err == nil {
t.Fatal("expected error for short dataset ID")
}
})

t.Run("invalid dataset ID - too long", func(t *testing.T) {
input := ImportSegmentsInput{
DatasetID: "abc123def456ghi789",
}
err := validateSegmentImportInput(input)
if err == nil {
t.Fatal("expected error for long dataset ID")
}
})

t.Run("invalid dataset ID - invalid characters", func(t *testing.T) {
input := ImportSegmentsInput{
DatasetID: "abc123!!!456",
}
err := validateSegmentImportInput(input)
if err == nil {
t.Fatal("expected error for invalid characters in dataset ID")
}
})

t.Run("invalid location ID", func(t *testing.T) {
input := ImportSegmentsInput{
DatasetID: "abc123def456",
LocationID: "invalid",
}
err := validateSegmentImportInput(input)
if err == nil {
t.Fatal("expected error for invalid location ID")
}
})

t.Run("invalid cluster ID", func(t *testing.T) {
input := ImportSegmentsInput{
DatasetID: "abc123def456",
LocationID: "xyz789uvw012",
ClusterID: "invalid",
}
err := validateSegmentImportInput(input)
if err == nil {
t.Fatal("expected error for invalid cluster ID")
}
})
}

func TestCountTotalSegments(t *testing.T) {
t.Run("empty", func(t *testing.T) {
count := countTotalSegments(map[string]scannedDataFile{})
if count != 0 {
t.Errorf("expected 0, got %d", count)
}
})

t.Run("single file - no segments", func(t *testing.T) {
files := map[string]scannedDataFile{
"file1": {Segments: []*utils.Segment{}},
}
count := countTotalSegments(files)
if count != 0 {
t.Errorf("expected 0, got %d", count)
}
})

t.Run("single file - multiple segments", func(t *testing.T) {
files := map[string]scannedDataFile{
"file1": {Segments: []*utils.Segment{{}, {}, {}}},
}
count := countTotalSegments(files)
if count != 3 {
t.Errorf("expected 3, got %d", count)
}
})

t.Run("multiple files", func(t *testing.T) {
files := map[string]scannedDataFile{
"file1": {Segments: []*utils.Segment{{}, {}}},
"file2": {Segments: []*utils.Segment{{}}},
"file3": {Segments: []*utils.Segment{{}, {}, {}, {}}},
}
count := countTotalSegments(files)
if count != 7 {
t.Errorf("expected 7, got %d", count)
}
})
}
file deletion: import_segments.go (----------)

[6.248737]→[6.345207:345249](∅→∅),[6.345249]→[6.318166:318166](∅→∅)

package tools

import (
"context"
"database/sql"
"fmt"
"os"
"path/filepath"
"strings"
"time"

"skraak/db"
"skraak/utils"
)

// ImportSegmentsInput defines the input parameters for the import_segments tool
type ImportSegmentsInput struct {
Folder string `json:"folder"`
Mapping string `json:"mapping"`
DatasetID string `json:"dataset_id"`
LocationID string `json:"location_id"`
ClusterID string `json:"cluster_id"`
ProgressHandler func(processed, total int, message string)
}

// ImportSegmentsOutput defines the output structure for the import_segments tool
type ImportSegmentsOutput struct {
Summary ImportSegmentsSummary `json:"summary"`
Segments []SegmentImport `json:"segments"`
Errors []ImportSegmentError `json:"errors,omitempty"`
}

// ImportSegmentsSummary provides summary statistics for the import operation
type ImportSegmentsSummary struct {
DataFilesFound int `json:"data_files_found"`
DataFilesProcessed int `json:"data_files_processed"`
TotalSegments int `json:"total_segments"`
ImportedSegments int `json:"imported_segments"`
ImportedLabels int `json:"imported_labels"`
ImportedSubtypes int `json:"imported_subtypes"`
ProcessingTimeMs int64 `json:"processing_time_ms"`
}

// SegmentImport represents an imported segment in the output
type SegmentImport struct {
SegmentID string `json:"segment_id"`
FileName string `json:"file_name"`
StartTime float64 `json:"start_time"`
EndTime float64 `json:"end_time"`
FreqLow float64 `json:"freq_low"`
FreqHigh float64 `json:"freq_high"`
Labels []LabelImport `json:"labels"`
}

// LabelImport represents an imported label in the output
type LabelImport struct {
LabelID string `json:"label_id"`
Species string `json:"species"`
CallType string `json:"calltype,omitempty"`
Filter string `json:"filter"`
Certainty int `json:"certainty"`
Comment string `json:"comment,omitempty"`
}

// ImportSegmentError records errors encountered during segment import
type ImportSegmentError struct {
File string `json:"file,omitempty"`
Stage utils.ImportStage `json:"stage"`
Message string `json:"message"`
}

// scannedDataFile holds parsed data for a .data file
type scannedDataFile struct {
DataPath string
WavPath string
WavHash string
FileID string
Duration float64
Segments []*utils.Segment
}

// ImportSegments imports segments from AviaNZ .data files into the database
func ImportSegments(ctx context.Context, input ImportSegmentsInput) (ImportSegmentsOutput, error) {
startTime := time.Now()
var output ImportSegmentsOutput
output.Segments = make([]SegmentImport, 0)
output.Errors = make([]ImportSegmentError, 0)

// Phase A: Input Validation
if err := validateSegmentImportInput(input); err != nil {
return output, err
}

// Load mapping file
mapping, err := utils.LoadMappingFile(input.Mapping)
if err != nil {
return output, fmt.Errorf("failed to load mapping file: %w", err)
}

// Find .data files
dataFiles, err := utils.FindDataFiles(input.Folder)
if err != nil {
return output, fmt.Errorf("failed to find .data files: %w", err)
}
output.Summary.DataFilesFound = len(dataFiles)

if len(dataFiles) == 0 {
return output, fmt.Errorf("no .data files found in folder: %s", input.Folder)
}

// Phase B+C: Parse data files and validate against DB
if err != nil {
return output, fmt.Errorf("failed to open database: %w", err)
}
defer database.Close()

val, valErrors, err := validateAndPrepareSegments(database, input, mapping, dataFiles)
output.Errors = append(output.Errors, valErrors...)
if err != nil {
return output, err
}
if val == nil || len(val.fileIDMap) == 0 {
output.Summary.ProcessingTimeMs = time.Since(startTime).Milliseconds()
return output, nil
}

// Phase D: Transactional Import
importedSegments, importedLabels, importedSubtypes, fileUpdates, importErrors := importSegmentsIntoDB(
ctx, database, val.fileIDMap, val.scannedFiles, mapping, val.filterIDMap, val.speciesIDMap, val.calltypeIDMap, input.DatasetID, input.ProgressHandler,
)
output.Errors = append(output.Errors, importErrors...)
output.Segments = append(output.Segments, importedSegments...)

// Phase E: Write IDs back to .data files
if len(fileUpdates) > 0 {
writeErrors := writeIDsToDataFiles(fileUpdates)
output.Errors = append(output.Errors, writeErrors...)
}

output.Summary.DataFilesProcessed = len(val.fileIDMap)
output.Summary.TotalSegments = countTotalSegments(val.fileIDMap)
output.Summary.ImportedSegments = len(importedSegments)
output.Summary.ImportedLabels = importedLabels
output.Summary.ImportedSubtypes = importedSubtypes
output.Summary.ProcessingTimeMs = time.Since(startTime).Milliseconds()

return output, nil
}

// validateSegmentImportInput validates input parameters
func validateSegmentImportInput(input ImportSegmentsInput) error {
// Validate folder exists
if info, err := os.Stat(input.Folder); err != nil {
return fmt.Errorf("folder does not exist: %s", input.Folder)
} else if !info.IsDir() {
return fmt.Errorf("path is not a folder: %s", input.Folder)
}

// Validate mapping file exists
if _, err := os.Stat(input.Mapping); err != nil {
return fmt.Errorf("mapping file does not exist: %s", input.Mapping)
}

// Validate IDs
if err := utils.ValidateShortID(input.DatasetID, "dataset_id"); err != nil {
return err
}
if err := utils.ValidateShortID(input.LocationID, "location_id"); err != nil {
return err
}
if err := utils.ValidateShortID(input.ClusterID, "cluster_id"); err != nil {
return err
}

return nil
}

// validateSegmentHierarchy validates dataset/location/cluster relationships
func validateSegmentHierarchy(dbConn *sql.DB, datasetID, locationID, clusterID string) error {
// Validate dataset exists and is structured
if err := db.ValidateDatasetTypeForImport(dbConn, datasetID); err != nil {
return err
}

// Validate location belongs to dataset
if err := db.ValidateLocationBelongsToDataset(dbConn, locationID, datasetID); err != nil {
return err
}

// Validate cluster belongs to location
if err := db.ClusterBelongsToLocation(dbConn, clusterID, locationID); err != nil {
return err
}

return nil
}

// scanAllDataFiles parses all .data files and collects unique values
func scanAllDataFiles(dataFiles []string, folder string) (
[]scannedDataFile,
[]ImportSegmentError,
map[string]bool,
map[string]bool,
map[string]map[string]bool,
) {
var scanned []scannedDataFile
var errors []ImportSegmentError
uniqueFilters := make(map[string]bool)
uniqueSpecies := make(map[string]bool)
uniqueCalltypes := make(map[string]map[string]bool) // species -> calltype -> true

for _, dataPath := range dataFiles {
// Find corresponding WAV file
wavPath := strings.TrimSuffix(dataPath, ".data")
if _, err := os.Stat(wavPath); err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(dataPath),
Stage: utils.StageValidation,
Message: fmt.Sprintf("corresponding WAV file not found: %s", filepath.Base(wavPath)),
})
continue
}

// Parse .data file
df, err := utils.ParseDataFile(dataPath)
if err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(dataPath),
Stage: utils.StageValidation,
Message: fmt.Sprintf("failed to parse .data file: %v", err),
})
continue
}

// Collect unique filters, species, calltypes
for _, seg := range df.Segments {
for _, label := range seg.Labels {
uniqueFilters[label.Filter] = true
uniqueSpecies[label.Species] = true
if label.CallType != "" {
if uniqueCalltypes[label.Species] == nil {
uniqueCalltypes[label.Species] = make(map[string]bool)
}
uniqueCalltypes[label.Species][label.CallType] = true
}
}
}

scanned = append(scanned, scannedDataFile{
DataPath: dataPath,
WavPath: wavPath,
Duration: df.Meta.Duration,
Segments: df.Segments,
})
}

return scanned, errors, uniqueFilters, uniqueSpecies, uniqueCalltypes
}

// validateFiltersExist checks all filters exist in DB and returns ID map
func validateFiltersExist(dbConn *sql.DB, filterNames map[string]bool) (map[string]string, error) {
filterIDMap := make(map[string]string)

if len(filterNames) == 0 {
return filterIDMap, nil
}

names := make([]string, 0, len(filterNames))
for name := range filterNames {
names = append(names, name)
}

query := `SELECT id, name FROM filter WHERE name IN (` + db.Placeholders(len(names)) + `) AND active = true`
args := make([]any, len(names))
for i, name := range names {
args[i] = name
}

rows, err := dbConn.Query(query, args...)
if err != nil {
return nil, fmt.Errorf("failed to query filters: %w", err)
}
defer rows.Close()

for rows.Next() {
var id, name string
if err := rows.Scan(&id, &name); err == nil {
filterIDMap[name] = id
}
}

// Check for missing filters
var missing []string
for name := range filterNames {
if _, exists := filterIDMap[name]; !exists {
missing = append(missing, name)
}
}

if len(missing) > 0 {
return nil, fmt.Errorf("filters not found in database: [%s]", strings.Join(missing, ", "))
}

return filterIDMap, nil
}

// loadSpeciesCalltypeIDs loads species and calltype ID maps
func loadSpeciesCalltypeIDs(
dbConn *sql.DB,
mapping utils.MappingFile,
uniqueSpecies map[string]bool,
uniqueCalltypes map[string]map[string]bool,
) (map[string]string, map[string]map[string]string, error) {
speciesIDMap := make(map[string]string)
calltypeIDMap := make(map[string]map[string]string) // (dbSpecies, dbCalltype) -> calltype_id

// Collect all DB species labels from mapping
dbSpeciesSet := make(map[string]bool)
for dataSpecies := range uniqueSpecies {
if dbSpecies, ok := mapping.GetDBSpecies(dataSpecies); ok {
dbSpeciesSet[dbSpecies] = true
}
}

// Load species IDs
if len(dbSpeciesSet) > 0 {
dbSpeciesList := make([]string, 0, len(dbSpeciesSet))
for s := range dbSpeciesSet {
dbSpeciesList = append(dbSpeciesList, s)
}

query := `SELECT id, label FROM species WHERE label IN (` + db.Placeholders(len(dbSpeciesList)) + `) AND active = true`
args := make([]any, len(dbSpeciesList))
for i, s := range dbSpeciesList {
args[i] = s
}

rows, err := dbConn.Query(query, args...)
if err != nil {
return nil, nil, fmt.Errorf("failed to query species: %w", err)
}
defer rows.Close()

for rows.Next() {
var id, label string
if err := rows.Scan(&id, &label); err == nil {
speciesIDMap[label] = id
}
}
}

// Load calltype IDs
for dataSpecies, ctSet := range uniqueCalltypes {
dbSpecies, ok := mapping.GetDBSpecies(dataSpecies)
if !ok {
continue
}

if calltypeIDMap[dbSpecies] == nil {
calltypeIDMap[dbSpecies] = make(map[string]string)
}

for dataCalltype := range ctSet {
dbCalltype := mapping.GetDBCalltype(dataSpecies, dataCalltype)

// Query calltype ID
var calltypeID string
err := dbConn.QueryRow(`
SELECT ct.id
FROM call_type ct
JOIN species s ON ct.species_id = s.id
WHERE s.label = ? AND ct.label = ? AND ct.active = true
`, dbSpecies, dbCalltype).Scan(&calltypeID)

if err == nil {
calltypeIDMap[dbSpecies][dbCalltype] = calltypeID
}
}
}

return speciesIDMap, calltypeIDMap, nil
}

// validateAndMapFiles validates files exist by hash, are linked to dataset, and have no existing labels
func validateAndMapFiles(
dbConn *sql.DB,
scannedFiles []scannedDataFile,
clusterID string,
datasetID string,
) (map[string]scannedDataFile, []ImportSegmentError) {
fileIDMap := make(map[string]scannedDataFile)
var errors []ImportSegmentError

for _, sf := range scannedFiles {
// Compute hash
hash, err := utils.ComputeXXH64(sf.WavPath)
if err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.WavPath),
Stage: utils.StageHash,
Message: fmt.Sprintf("failed to compute hash: %v", err),
})
continue
}
sf.WavHash = hash

// Find file by hash in cluster
var fileID string
var duration float64
err = dbConn.QueryRow(`
SELECT id, duration FROM file WHERE xxh64_hash = ? AND cluster_id = ? AND active = true
`, hash, clusterID).Scan(&fileID, &duration)

if err == sql.ErrNoRows {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.WavPath),
Stage: utils.StageValidation,
Message: fmt.Sprintf("file hash not found in database for cluster (hash: %s)", hash),
})
continue
}
if err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.WavPath),
Stage: utils.StageValidation,
Message: fmt.Sprintf("failed to query file: %v", err),
})
continue
}

sf.FileID = fileID
sf.Duration = duration

// Verify file is linked to dataset via file_dataset junction table (composite FK)
var fileLinkedToDataset bool
err = dbConn.QueryRow(`
SELECT EXISTS(SELECT 1 FROM file_dataset WHERE file_id = ? AND dataset_id = ?)
`, fileID, datasetID).Scan(&fileLinkedToDataset)
if err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.WavPath),
Stage: utils.StageValidation,
Message: fmt.Sprintf("failed to verify file-dataset link: %v", err),
})
continue
}
if !fileLinkedToDataset {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.WavPath),
Stage: utils.StageValidation,
Message: fmt.Sprintf("file exists in cluster but is not linked to dataset %s", datasetID),
})
continue
}

// Check no existing labels for this file
var labelCount int
err = dbConn.QueryRow(`
SELECT COUNT(*) FROM label l
JOIN segment s ON l.segment_id = s.id
WHERE s.file_id = ? AND l.active = true
`, fileID).Scan(&labelCount)

if err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.WavPath),
Stage: utils.StageValidation,
Message: fmt.Sprintf("failed to check existing labels: %v", err),
})
continue
}

if labelCount > 0 {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.WavPath),
Stage: utils.StageValidation,
Message: fmt.Sprintf("file already has %d label(s) - fresh imports only", labelCount),
})
continue
}

fileIDMap[fileID] = sf
}

return fileIDMap, errors
}

// dataFileUpdate holds data to write back to .data file after import
type dataFileUpdate struct {
DataPath string
WavHash string
LabelIDs map[int]map[int]string // segmentIndex -> labelIndex -> labelID
}

// importSegmentsIntoDB performs the transactional import
func importSegmentsIntoDB(
ctx context.Context,
database *sql.DB,
fileIDMap map[string]scannedDataFile,
scannedFiles []scannedDataFile,
mapping utils.MappingFile,
filterIDMap map[string]string,
speciesIDMap map[string]string,
calltypeIDMap map[string]map[string]string,
datasetID string,
progressHandler func(processed, total int, message string),
) ([]SegmentImport, int, int, []dataFileUpdate, []ImportSegmentError) {
var importedSegments []SegmentImport
var errors []ImportSegmentError
importedLabels := 0
importedSubtypes := 0
var fileUpdates []dataFileUpdate

tx, err := db.BeginLoggedTx(ctx, database, "import_segments")
if err != nil {
errors = append(errors, ImportSegmentError{
Stage: utils.StageImport,
Message: fmt.Sprintf("failed to begin transaction: %v", err),
})
return nil, 0, 0, nil, errors
}
defer tx.Rollback()

totalFiles := len(fileIDMap)
processedFiles := 0

for _, sf := range fileIDMap {
if sf.FileID == "" {
continue
}

processedFiles++
if progressHandler != nil {
progressHandler(processedFiles, totalFiles, filepath.Base(sf.DataPath))
}

fileUpdate := dataFileUpdate{
DataPath: sf.DataPath,
WavHash: sf.WavHash,
LabelIDs: make(map[int]map[int]string),
}

for segIdx, seg := range sf.Segments {
segImp, labelIDs, subtypes, segErrs := importSegment(ctx, tx, seg, segIdx, sf, datasetID, mapping, filterIDMap, speciesIDMap, calltypeIDMap)
errors = append(errors, segErrs...)
importedSubtypes += subtypes

if len(segImp.Labels) == 0 {
// Delete orphaned segment (no labels succeeded)
if _, err := tx.ExecContext(ctx, `DELETE FROM segment WHERE id = ?`, segImp.SegmentID); err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("failed to delete orphaned segment: %v", err),
})
}
} else {
importedSegments = append(importedSegments, segImp)
importedLabels += len(labelIDs)
fileUpdate.LabelIDs[segIdx] = labelIDs
}
}

fileUpdates = append(fileUpdates, fileUpdate)
}

if err := tx.Commit(); err != nil {
errors = append(errors, ImportSegmentError{
Stage: utils.StageImport,
Message: fmt.Sprintf("failed to commit transaction: %v", err),
})
return nil, 0, 0, nil, errors
}

return importedSegments, importedLabels, importedSubtypes, fileUpdates, errors
}

// countTotalSegments counts total segments from validated files
func countTotalSegments(fileIDMap map[string]scannedDataFile) int {
count := 0
for _, sf := range fileIDMap {
count += len(sf.Segments)
}
return count
}

// writeIDsToDataFiles writes skraak_hash and skraak_label_ids back to .data files
func writeIDsToDataFiles(fileUpdates []dataFileUpdate) []ImportSegmentError {
var errors []ImportSegmentError

for _, fu := range fileUpdates {
// Parse the .data file
df, err := utils.ParseDataFile(fu.DataPath)
if err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(fu.DataPath),
Stage: utils.StageImport,
Message: fmt.Sprintf("failed to re-parse .data file for writing: %v", err),
})
continue
}

// Write skraak_hash to metadata
if df.Meta.Extra == nil {
df.Meta.Extra = make(map[string]any)
}
df.Meta.Extra["skraak_hash"] = fu.WavHash

// Write skraak_label_id to each label
for segIdx, labelIDs := range fu.LabelIDs {
if segIdx >= len(df.Segments) {
continue
}
seg := df.Segments[segIdx]
for labelIdx, labelID := range labelIDs {
if labelIdx >= len(seg.Labels) {
continue
}
label := seg.Labels[labelIdx]
if label.Extra == nil {
label.Extra = make(map[string]any)
}
label.Extra["skraak_label_id"] = labelID
}
}

// Write the updated .data file
if err := df.Write(fu.DataPath); err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(fu.DataPath),
Stage: utils.StageImport,
Message: fmt.Sprintf("failed to write updated .data file: %v", err),
})
continue
}
}

return errors
}
if seg.EndTime > sf.Duration {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("segment end time (%.2f) exceeds file duration (%.2f)", seg.EndTime, sf.Duration),
})
return SegmentImport{}, nil, 0, errors
}

segmentID, err := utils.GenerateLongID()
if err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("failed to generate segment ID: %v", err),
})
return SegmentImport{}, nil, 0, errors
}

_, err = tx.ExecContext(ctx, `
INSERT INTO segment (id, file_id, dataset_id, start_time, end_time, freq_low, freq_high, created_at, last_modified, active)
VALUES (?, ?, ?, ?, ?, ?, ?, now(), now(), true)
`, segmentID, sf.FileID, datasetID, seg.StartTime, seg.EndTime, seg.FreqLow, seg.FreqHigh)
if err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("failed to insert segment: %v", err),
})
return SegmentImport{}, nil, 0, errors
}

segImport := SegmentImport{
SegmentID: segmentID,
FileName: filepath.Base(sf.WavPath),
StartTime: seg.StartTime,
EndTime: seg.EndTime,
FreqLow: seg.FreqLow,
FreqHigh: seg.FreqHigh,
Labels: make([]LabelImport, 0),
}
labelIDs := make(map[int]string)
var subtypesImported int

for labelIdx, label := range seg.Labels {
result := importSingleLabel(ctx, tx, label, segmentID, segIdx, labelIdx, sf, mapping, filterIDMap, speciesIDMap, calltypeIDMap)
if result.hasError {
errors = append(errors, result.err)
continue
}
labelIDs[labelIdx] = result.labelID
segImport.Labels = append(segImport.Labels, result.labelImport)
subtypesImported += result.subtypesImported
}

return segImport, labelIDs, subtypesImported, errors
}

// importSegment inserts a single segment and its labels into the DB.
func importSegment(
ctx context.Context,
tx *db.LoggedTx,
seg *utils.Segment,
segIdx int,
sf scannedDataFile,
datasetID string,
mapping utils.MappingFile,
filterIDMap map[string]string,
speciesIDMap map[string]string,
calltypeIDMap map[string]map[string]string,
) (SegmentImport, map[int]string, int, []ImportSegmentError) {
var errors []ImportSegmentError

if seg.StartTime >= seg.EndTime {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("invalid segment bounds: start=%.2f >= end=%.2f", seg.StartTime, seg.EndTime),
})
return SegmentImport{}, nil, 0, errors
}
}

// importLabelResult holds the result of importing a single label.
type importLabelResult struct {
labelImport LabelImport
labelID string
subtypesImported int
err ImportSegmentError
hasError bool
}

// importSingleLabel inserts a single label and its metadata/subtype into the DB.
func importSingleLabel(
ctx context.Context,
tx *db.LoggedTx,
label *utils.Label,
segmentID string,
segIdx, labelIdx int,
sf scannedDataFile,
mapping utils.MappingFile,
filterIDMap map[string]string,
speciesIDMap map[string]string,
calltypeIDMap map[string]map[string]string,
) importLabelResult {
dbSpecies, ok := mapping.GetDBSpecies(label.Species)
if !ok {
return importLabelResult{err: ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("species not found in mapping: %s", label.Species),
}, hasError: true}
}

speciesID, ok := speciesIDMap[dbSpecies]
if !ok {
return importLabelResult{err: ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("species ID not found: %s", dbSpecies),
}, hasError: true}
}

filterID, ok := filterIDMap[label.Filter]
if !ok {
return importLabelResult{err: ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("filter ID not found: %s", label.Filter),
}, hasError: true}
}

labelID, err := utils.GenerateLongID()
if err != nil {
return importLabelResult{err: ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("failed to generate label ID: %v", err),
}, hasError: true}
}

_, err = tx.ExecContext(ctx, `
INSERT INTO label (id, segment_id, species_id, filter_id, certainty, created_at, last_modified, active)
VALUES (?, ?, ?, ?, ?, now(), now(), true)
`, labelID, segmentID, speciesID, filterID, label.Certainty)
if err != nil {
return importLabelResult{err: ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("failed to insert label: %v", err),
}, hasError: true}
}

// Insert label_metadata if comment exists
if label.Comment != "" {
escapedComment := strings.ReplaceAll(label.Comment, `"`, `\"`)
metadataJSON := fmt.Sprintf(`{"comment": "%s"}`, escapedComment)
if _, err := tx.ExecContext(ctx, `
INSERT INTO label_metadata (label_id, json, created_at, last_modified, active)
VALUES (?, ?, now(), now(), true)
`, labelID, metadataJSON); err != nil {
return importLabelResult{err: ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("failed to insert label_metadata: %v", err),
}, hasError: true}
}
}

labelImport := LabelImport{
LabelID: labelID,
Species: dbSpecies,
Filter: label.Filter,
Certainty: label.Certainty,
}
if label.Comment != "" {
labelImport.Comment = label.Comment
}

// Insert label_subtype if calltype exists
if label.CallType != "" {
if err := importCalltype(ctx, tx, labelID, label, dbSpecies, filterID, mapping, calltypeIDMap, sf); err != nil {
return importLabelResult{err: *err, hasError: true}
}
labelImport.CallType = mapping.GetDBCalltype(label.Species, label.CallType)
return importLabelResult{labelImport: labelImport, labelID: labelID, subtypesImported: 1}
}

return importLabelResult{labelImport: labelImport, labelID: labelID}
}

// importCalltype inserts a label_subtype row for a calltype label.
func importCalltype(
ctx context.Context,
tx *db.LoggedTx,
labelID string,
label *utils.Label,
dbSpecies string,
filterID string,
mapping utils.MappingFile,
calltypeIDMap map[string]map[string]string,
sf scannedDataFile,
) *ImportSegmentError {
dbCalltype := mapping.GetDBCalltype(label.Species, label.CallType)

calltypeID := ""
if calltypeIDMap[dbSpecies] != nil {
calltypeID = calltypeIDMap[dbSpecies][dbCalltype]
}
if calltypeID == "" {
return &ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("calltype ID not found: %s/%s", dbSpecies, dbCalltype),
}
}

subtypeID, err := utils.GenerateLongID()
if err != nil {
return &ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("failed to generate label_subtype ID: %v", err),
}
}

_, err = tx.ExecContext(ctx, `
INSERT INTO label_subtype (id, label_id, calltype_id, filter_id, certainty, created_at, last_modified, active)
VALUES (?, ?, ?, ?, ?, now(), now(), true)
`, subtypeID, labelID, calltypeID, filterID, label.Certainty)
if err != nil {
return &ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("failed to insert label_subtype: %v", err),
}
}
return nil
database, err := db.OpenWriteableDB(resolveDBPath(input.DBPath))
// segmentValidation holds the results of pre-import validation (phases B+C).
type segmentValidation struct {
scannedFiles []scannedDataFile
filterIDMap map[string]string
speciesIDMap map[string]string
calltypeIDMap map[string]map[string]string
fileIDMap map[string]scannedDataFile
}

// validateAndPrepareSegments performs phases B+C: parse data files, validate DB state, and prepare ID maps.
func validateAndPrepareSegments(
database *sql.DB,
input ImportSegmentsInput,
mapping utils.MappingFile,
dataFiles []string,
) (*segmentValidation, []ImportSegmentError, error) {
// Phase B: Parse all .data files and collect unique values
scannedFiles, parseErrors, uniqueFilters, uniqueSpecies, uniqueCalltypes := scanAllDataFiles(dataFiles, input.Folder)
if len(scannedFiles) == 0 {
return nil, parseErrors, nil
}

// Validate dataset/location/cluster hierarchy
if err := validateSegmentHierarchy(database, input.DatasetID, input.LocationID, input.ClusterID); err != nil {
return nil, parseErrors, err
}

// Validate all filters exist
filterIDMap, err := validateFiltersExist(database, uniqueFilters)
if err != nil {
return nil, parseErrors, fmt.Errorf("filter validation failed: %w", err)
}

// Validate mapping covers all species/calltypes and they exist in DB
validationResult, err := utils.ValidateMappingAgainstDB(database, mapping, uniqueSpecies, uniqueCalltypes)
if err != nil {
return nil, parseErrors, fmt.Errorf("mapping validation failed: %w", err)
}
if validationResult.HasErrors() {
return nil, parseErrors, fmt.Errorf("mapping validation failed: %s", validationResult.Error())
}

// Load species and calltype ID maps
speciesIDMap, calltypeIDMap, err := loadSpeciesCalltypeIDs(database, mapping, uniqueSpecies, uniqueCalltypes)
if err != nil {
return nil, parseErrors, fmt.Errorf("failed to load species/calltype IDs: %w", err)
}

// Validate files: hash exists, linked to dataset, no existing labels
fileIDMap, hashErrors := validateAndMapFiles(database, scannedFiles, input.ClusterID, input.DatasetID)
allErrors := append(parseErrors, hashErrors...)

return &segmentValidation{
scannedFiles: scannedFiles,
filterIDMap: filterIDMap,
speciesIDMap: speciesIDMap,
calltypeIDMap: calltypeIDMap,
fileIDMap: fileIDMap,
}, allErrors, nil
}

DBPath string `json:"db_path"`
file deletion: import_files.go (----------)

[6.248737]→[6.351133:351172](∅→∅),[6.351172]→[6.345251:345251](∅→∅)

package tools

import (
"context"
"database/sql"
"fmt"
"os"
"time"

"skraak/db"
"skraak/utils"
)

// ImportAudioFilesInput defines the input parameters for the import_audio_files tool
type ImportAudioFilesInput struct {
FolderPath string `json:"folder_path"`
DatasetID string `json:"dataset_id"`
LocationID string `json:"location_id"`
ClusterID string `json:"cluster_id"`
Recursive *bool `json:"recursive,omitempty"` // *bool because default is true; plain bool would make "not provided" indistinguishable from "false"
}

// ImportAudioFilesOutput defines the output structure for the import_audio_files tool
type ImportAudioFilesOutput struct {
Summary ImportSummary `json:"summary"`
FileIDs []string `json:"file_ids"`
Errors []utils.FileImportError `json:"errors,omitempty"`
}

// ImportSummary provides summary statistics for the import operation
type ImportSummary struct {
TotalFiles int `json:"total_files"`
ImportedFiles int `json:"imported_files"`
SkippedFiles int `json:"skipped_files"` // Duplicates
FailedFiles int `json:"failed_files"`
AudioMothFiles int `json:"audiomoth_files"`
TotalDuration float64 `json:"total_duration_seconds"`
ProcessingTime string `json:"processing_time"`
}

// ImportAudioFiles batch imports WAV files from a folder with hash-based duplicate detection
func ImportAudioFiles(
ctx context.Context,
input ImportAudioFilesInput,
) (ImportAudioFilesOutput, error) {
startTime := time.Now()
var output ImportAudioFilesOutput

// Default recursive to true
recursive := true
if input.Recursive != nil {
recursive = *input.Recursive
}

// Validate database hierarchy (dataset → location → cluster)
return output, fmt.Errorf("validation failed: %w", err)
}

// Open database
if err != nil {
return output, fmt.Errorf("failed to open database: %w", err)
}
defer database.Close()

// Set cluster path if empty
err = utils.EnsureClusterPath(database, input.ClusterID, input.FolderPath)
if err != nil {
return output, fmt.Errorf("failed to set cluster path: %w", err)
}

// Import the cluster (ALL THE LOGIC IS HERE)
FolderPath: input.FolderPath,
DatasetID: input.DatasetID,
LocationID: input.LocationID,
ClusterID: input.ClusterID,
Recursive: recursive,
})
if err != nil {
return output, fmt.Errorf("cluster import failed: %w", err)
}

// Map to output format
output = ImportAudioFilesOutput{
Summary: ImportSummary{
TotalFiles: clusterOutput.TotalFiles,
ImportedFiles: clusterOutput.ImportedFiles,
SkippedFiles: clusterOutput.SkippedFiles,
FailedFiles: clusterOutput.FailedFiles,
AudioMothFiles: clusterOutput.AudioMothFiles,
TotalDuration: clusterOutput.TotalDuration,
ProcessingTime: time.Since(startTime).String(),
},
FileIDs: []string{}, // File IDs not tracked currently
Errors: clusterOutput.Errors,
}

return output, nil
}

// validateImportInput validates all input parameters and database relationships
func validateImportInput(input ImportAudioFilesInput, dbPath string) error {
// Verify folder exists
info, err := os.Stat(input.FolderPath)
if err != nil {
return fmt.Errorf("folder not accessible: %w", err)
}
if !info.IsDir() {
return fmt.Errorf("path is not a directory: %s", input.FolderPath)
}

return validateHierarchyIDs(input.DatasetID, input.LocationID, input.ClusterID, dbPath)
}

// validateHierarchyIDs validates dataset/location/cluster ID formats and database relationships
func validateHierarchyIDs(datasetID, locationID, clusterID, dbPath string) error {
// Validate ID formats first (fast fail before DB queries)
if err := utils.ValidateShortID(datasetID, "dataset_id"); err != nil {
return err
}
if err := utils.ValidateShortID(locationID, "location_id"); err != nil {
return err
}
if err := utils.ValidateShortID(clusterID, "cluster_id"); err != nil {
return err
}

return db.WithReadDB(dbPath, func(database *sql.DB) error {
// Verify dataset exists, is active, and is 'structured' type
if err := db.ValidateDatasetTypeForImport(database, datasetID); err != nil {
return err
}

// Verify location exists and belongs to dataset
if err := db.ValidateLocationBelongsToDataset(database, locationID, datasetID); err != nil {
return err
}

// Verify cluster exists and belongs to location
if err := db.ClusterBelongsToLocation(database, clusterID, locationID); err != nil {
return err
}

return nil
})
}
}

if err := tx.Commit(); err != nil {
return output, fmt.Errorf("transaction commit failed: %w", err)
tx.Rollback()
tx, err := db.BeginLoggedTx(ctx, database, "import_audio_files")
if err != nil {
return output, fmt.Errorf("failed to begin transaction: %w", err)
}

clusterOutput, err := utils.ImportCluster(database, tx.UnderlyingTx(), utils.ClusterImportInput{
database, err := db.OpenWriteableDB(resolveDBPath(input.DBPath))
if err := validateImportInput(input, resolveDBPath(input.DBPath)); err != nil {
DBPath string `json:"db_path"`
file deletion: import_file.go (----------)

[6.248737]→[6.357911:357949](∅→∅),[6.357949]→[6.351174:351174](∅→∅)

package tools

import (
"context"
"database/sql"
"fmt"
"os"
"path/filepath"
"strings"
"time"

"skraak/db"
"skraak/utils"
)

// ImportFileInput defines the input parameters for the import_file tool
type ImportFileInput struct {
FilePath string `json:"file_path"`
DatasetID string `json:"dataset_id"`
LocationID string `json:"location_id"`
ClusterID string `json:"cluster_id"`
}

// ImportFileOutput defines the output structure for the import_file tool
type ImportFileOutput struct {
FileID string `json:"file_id"`
FileName string `json:"file_name"`
Hash string `json:"hash"`
Duration float64 `json:"duration_seconds"`
SampleRate int `json:"sample_rate"`
TimestampLocal time.Time `json:"timestamp_local"`
IsAudioMoth bool `json:"is_audiomoth"`
IsDuplicate bool `json:"is_duplicate"`
ProcessingTime string `json:"processing_time"`
Error *string `json:"error,omitempty"`
}

// ImportFile imports a single WAV file into the database with duplicate detection
func ImportFile(
ctx context.Context,
input ImportFileInput,
) (ImportFileOutput, error) {
startTime := time.Now()
var output ImportFileOutput

// Phase 1: Validate file path
_, err := validateFilePath(input.FilePath)
if err != nil {
return output, fmt.Errorf("file validation failed: %w", err)
}
output.FileName = filepath.Base(input.FilePath)

// Phase 2: Validate database hierarchy
return output, fmt.Errorf("hierarchy validation failed: %w", err)
}

// Phase 3: Open database connection (single connection for all DB operations)
if err != nil {
return output, fmt.Errorf("database connection failed: %w", err)
}
defer database.Close()

// Phase 4: Get location data for astronomical calculations
locData, err := utils.GetLocationData(database, input.LocationID)
if err != nil {
return output, fmt.Errorf("failed to get location data: %w", err)
}

// Phase 5: Process file metadata
result, err := utils.ProcessSingleFile(input.FilePath, locData.Latitude, locData.Longitude, locData.TimezoneID, true)
if err != nil {
errMsg := err.Error()
output.Error = &errMsg
output.ProcessingTime = time.Since(startTime).String()
return output, fmt.Errorf("file processing failed: %w", err)
}

// Populate output with extracted metadata
output.FileName = result.FileName
output.Hash = result.Hash
output.Duration = result.Duration
output.SampleRate = result.SampleRate
output.TimestampLocal = result.TimestampLocal
output.IsAudioMoth = result.IsAudioMoth

// Phase 6: Ensure cluster path is set
if err := utils.EnsureClusterPath(database, input.ClusterID, filepath.Dir(input.FilePath)); err != nil {
return output, fmt.Errorf("failed to set cluster path: %w", err)
}

// Phase 7: Insert into database
fileID, isDuplicate, err := insertFileIntoDB(ctx, database, result, input.DatasetID, input.ClusterID, input.LocationID)
if err != nil {
errMsg := err.Error()
output.Error = &errMsg
output.ProcessingTime = time.Since(startTime).String()
return output, fmt.Errorf("database insertion failed: %w", err)
}

output.FileID = fileID
output.IsDuplicate = isDuplicate
output.ProcessingTime = time.Since(startTime).String()

return output, nil
}

// validateFilePath validates the file exists, is a regular file, is a WAV file, and is not empty
func validateFilePath(filePath string) (os.FileInfo, error) {
// Check file exists
info, err := os.Stat(filePath)
if err != nil {
if os.IsNotExist(err) {
return nil, fmt.Errorf("file does not exist: %s", filePath)
}
return nil, fmt.Errorf("cannot access file: %w", err)
}

// Check it's a regular file
if !info.Mode().IsRegular() {
return nil, fmt.Errorf("path is not a regular file: %s", filePath)
}

// Check extension is .wav (case-insensitive)
ext := strings.ToLower(filepath.Ext(filePath))
if ext != ".wav" {
return nil, fmt.Errorf("file must be a WAV file (got extension: %s)", ext)
}

// Check file is not empty
if info.Size() == 0 {
return nil, fmt.Errorf("file is empty: %s", filePath)
}

return info, nil
}

// insertFileIntoDB inserts a single file into the database
// Returns (fileID, isDuplicate, error)
func insertFileIntoDB(
ctx context.Context,
database *sql.DB,
result *utils.FileProcessingResult,
datasetID, clusterID, locationID string,
) (string, bool, error) {
// Begin logged transaction
tx, err := db.BeginLoggedTx(ctx, database, "import_audio_file")
if err != nil {
return "", false, fmt.Errorf("failed to begin transaction: %w", err)
}
defer tx.Rollback() // Rollback if not committed

// Check for duplicate hash
existingID, isDup, err := utils.CheckDuplicateHash(tx, result.Hash)
if err != nil {
return "", false, err
}
if isDup {
return existingID, true, nil
}

// Generate file ID
fileID, err := utils.GenerateLongID()
if err != nil {
return "", false, fmt.Errorf("ID generation failed: %w", err)
}

// Insert file record
_, err = tx.ExecContext(ctx, `
INSERT INTO file (
id, file_name, xxh64_hash, location_id, timestamp_local,
cluster_id, duration, sample_rate, maybe_solar_night, maybe_civil_night,
moon_phase, created_at, last_modified, active
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, now(), now(), true)
`,
fileID, result.FileName, result.Hash, locationID,
result.TimestampLocal, clusterID, result.Duration, result.SampleRate,
result.AstroData.SolarNight, result.AstroData.CivilNight, result.AstroData.MoonPhase,
)
if err != nil {
return "", false, fmt.Errorf("file insert failed: %w", err)
}

// Insert file_dataset junction
_, err = tx.ExecContext(ctx, `
INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified)
VALUES (?, ?, now(), now())
`, fileID, datasetID)
if err != nil {
return "", false, fmt.Errorf("file_dataset insert failed: %w", err)
}

// If AudioMoth, insert moth_metadata
if result.IsAudioMoth && result.MothData != nil {
_, err = tx.ExecContext(ctx, `
INSERT INTO moth_metadata (
file_id, timestamp, recorder_id, gain, battery_v, temp_c,
created_at, last_modified, active
) VALUES (?, ?, ?, ?, ?, ?, now(), now(), true)
`,
fileID,
result.MothData.Timestamp,
&result.MothData.RecorderID,
&result.MothData.Gain,
&result.MothData.BatteryV,
&result.MothData.TempC,
)
if err != nil {
return "", false, fmt.Errorf("moth_metadata insert failed: %w", err)
}
}

// Commit transaction
if err = tx.Commit(); err != nil {
return "", false, fmt.Errorf("transaction commit failed: %w", err)
}

return fileID, false, nil
}
database, err := db.OpenWriteableDB(resolveDBPath(input.DBPath))
if err := validateHierarchyIDs(input.DatasetID, input.LocationID, input.ClusterID, resolveDBPath(input.DBPath)); err != nil {
DBPath string `json:"db_path"`
file deletion: calls_summarise.go (----------)

[6.248737]→[6.400201:400243](∅→∅),[6.400243]→[6.392542:392542](∅→∅)

package tools

import (
"sort"
"strings"

"skraak/utils"
)

// CallsSummariseInput defines the input for the calls-summarise tool
type CallsSummariseInput struct {
Folder string `json:"folder"`
Brief bool `json:"brief"`
Filter string `json:"filter,omitempty"`
}

// CallsSummariseOutput defines the output for the calls-summarise tool
type CallsSummariseOutput struct {
Segments []SegmentSummary `json:"segments"`
Folder string `json:"folder"`
DataFilesRead int `json:"data_files_read"`
DataFilesSkipped []string `json:"data_files_skipped"`
TotalSegments int `json:"total_segments"`
Filters map[string]FilterStats `json:"filters"`
ReviewStatus ReviewStatus `json:"review_status"`
Operators []string `json:"operators"`
Reviewers []string `json:"reviewers"`
Error *string `json:"error,omitempty"`
}

// SegmentSummary represents a single segment in the output
type SegmentSummary struct {
File string `json:"file"`
StartTime float64 `json:"start_time"`
EndTime float64 `json:"end_time"`
Labels []LabelSummary `json:"labels"`
}

// LabelSummary represents a label in the output (omits empty fields)
type LabelSummary struct {
Filter string `json:"filter"`
Certainty int `json:"certainty"`
Species string `json:"species"`
CallType string `json:"calltype,omitempty"`
Comment string `json:"comment,omitempty"`
Bookmark bool `json:"bookmark,omitempty"`
}

// FilterStats contains per-filter statistics
type FilterStats struct {
Segments int `json:"segments"`
Species map[string]int `json:"species"`
Calltypes map[string]map[string]int `json:"calltypes,omitempty"` // species -> calltype -> count
}

// ReviewStatus contains review progress statistics
type ReviewStatus struct {
Unreviewed int `json:"unreviewed"` // certainty < 100
Confirmed int `json:"confirmed"` // certainty = 100
DontKnow int `json:"dont_know"` // certainty = 0
WithCallType int `json:"with_calltype"`
WithComments int `json:"with_comments"`
Bookmarked int `json:"bookmarked"`
}

// CallsSummarise reads all .data files in a folder and produces a summary
func CallsSummarise(input CallsSummariseInput) (CallsSummariseOutput, error) {
var output CallsSummariseOutput

// Find all .data files
filePaths, err := utils.FindDataFiles(input.Folder)
if err != nil {
errMsg := err.Error()
output.Error = &errMsg
return output, err
}

// Initialize empty slices/maps (avoid null in JSON)
output.Segments = make([]SegmentSummary, 0)
output.Folder = input.Folder
output.Filters = make(map[string]FilterStats)
output.Operators = make([]string, 0)
output.Reviewers = make([]string, 0)
output.DataFilesSkipped = make([]string, 0)

if len(filePaths) == 0 {
return output, nil
}

// Track unique operators and reviewers
operatorSet := make(map[string]bool)
reviewerSet := make(map[string]bool)

// Count segments for total
if input.Brief {
for _, fs := range output.Filters {
output.TotalSegments += fs.Segments
}
} else {
output.TotalSegments = len(output.Segments)
}

finaliseSummary(&output, operatorSet, reviewerSet, input.Brief)

return output, nil
}

// summariseFiles processes all data files, populating output stats
func summariseFiles(filePaths []string, input CallsSummariseInput, output *CallsSummariseOutput, operatorSet, reviewerSet map[string]bool) {
for _, path := range filePaths {
df, err := utils.ParseDataFile(path)
if err != nil {
output.DataFilesSkipped = append(output.DataFilesSkipped, path)
continue
}

output.DataFilesRead++
trackMeta(df.Meta, operatorSet, reviewerSet)

var relPath string
if !input.Brief {
relPath = extractRelativePath(input.Folder, path)
}

for _, seg := range df.Segments {
filteredLabels := filterLabels(seg.Labels, input.Filter)
if input.Filter != "" && len(filteredLabels) == 0 {
continue
}

updateStatsFromLabels(filteredLabels, output)

if !input.Brief {
output.Segments = append(output.Segments, SegmentSummary{
File: relPath,
StartTime: seg.StartTime,
EndTime: seg.EndTime,
Labels: buildLabelSummaries(filteredLabels),
})
}

// trackMeta records operator and reviewer from file metadata
func trackMeta(meta *utils.DataMeta, operatorSet, reviewerSet map[string]bool) {
if meta == nil {
return
}
if meta.Operator != "" {
operatorSet[meta.Operator] = true
}
if meta.Reviewer != "" {
reviewerSet[meta.Reviewer] = true
}
}

// filterLabels returns labels matching the filter, or all labels if filter is empty
func filterLabels(labels []*utils.Label, filter string) []*utils.Label {
if filter == "" {
return labels
}
var filtered []*utils.Label
for _, l := range labels {
if l.Filter == filter {
filtered = append(filtered, l)
}
}
return filtered
}

// buildLabelSummaries converts labels to label summaries
func buildLabelSummaries(labels []*utils.Label) []LabelSummary {
var summaries []LabelSummary
for _, l := range labels {
ls := LabelSummary{
Filter: l.Filter,
Certainty: l.Certainty,
Species: l.Species,
}
if l.CallType != "" {
ls.CallType = l.CallType
}
if l.Comment != "" {
ls.Comment = l.Comment
}
if l.Bookmark {
ls.Bookmark = true
}
summaries = append(summaries, ls)
}
return summaries
}

// updateStatsFromLabels updates filter stats and review status from a set of labels
func updateStatsFromLabels(labels []*utils.Label, output *CallsSummariseOutput) {
for _, l := range labels {
updateFilterStats(l, output)
updateReviewStatus(l, output)
}
}

// updateFilterStats increments filter-level statistics for a single label
func updateFilterStats(l *utils.Label, output *CallsSummariseOutput) {
fs, exists := output.Filters[l.Filter]
if !exists {
fs = FilterStats{
Segments: 0,
Species: make(map[string]int),
Calltypes: make(map[string]map[string]int),
}
}

if l.CallType != "" {
if fs.Calltypes[l.Species] == nil {
fs.Calltypes[l.Species] = make(map[string]int)
}
fs.Calltypes[l.Species][l.CallType]++
}
output.Filters[l.Filter] = fs
}

// updateReviewStatus increments review status counters for a single label
func updateReviewStatus(l *utils.Label, output *CallsSummariseOutput) {
switch l.Certainty {
case 100:
output.ReviewStatus.Confirmed++
case 0:
output.ReviewStatus.DontKnow++
default:
output.ReviewStatus.Unreviewed++
}
if l.CallType != "" {
output.ReviewStatus.WithCallType++
}
if l.Comment != "" {
output.ReviewStatus.WithComments++
}
if l.Bookmark {
output.ReviewStatus.Bookmarked++
}

// finaliseSummary sorts output, cleans empty maps, and converts sets to sorted slices
func finaliseSummary(output *CallsSummariseOutput, operatorSet, reviewerSet map[string]bool, brief bool) {
// Clean up empty calltypes maps
for filter, fs := range output.Filters {
if len(fs.Calltypes) == 0 {
fs.Calltypes = nil
output.Filters[filter] = fs
}
}

// Convert sets to sorted slices
for op := range operatorSet {
output.Operators = append(output.Operators, op)
}
for r := range reviewerSet {
output.Reviewers = append(output.Reviewers, r)
}
sort.Strings(output.Operators)
sort.Strings(output.Reviewers)

// Sort segments by file, then start time
if !brief {
sort.Slice(output.Segments, func(i, j int) bool {
if output.Segments[i].File != output.Segments[j].File {
return output.Segments[i].File < output.Segments[j].File
}
return output.Segments[i].StartTime < output.Segments[j].StartTime
})
}
}

// extractRelativePath extracts the audio filename from a .data file path
// e.g., "/folder/tx51_LISTENING_20260221_203004.WAV.data" -> "tx51_LISTENING_20260221_203004.WAV"
// Preserves the original case of the extension as-is.
func extractRelativePath(folder, dataPath string) string {
// Get the filename
filename := dataPath
if idx := strings.LastIndex(dataPath, "/"); idx >= 0 {
filename = dataPath[idx+1:]
}

// Remove .data extension, preserve everything else
return strings.TrimSuffix(filename, ".data")
}
}
fs.Segments++
fs.Species[l.Species]++
}
}
}

summariseFiles(filePaths, input, &output, operatorSet, reviewerSet)
file deletion: calls_show_images.go (----------)

[6.248737]→[6.403517:403561](∅→∅),[6.403561]→[6.400245:400245](∅→∅)

package tools

import (
"fmt"
"os"
"strings"

"skraak/utils"
)

// CallsShowImagesInput defines the input for the show-images tool
type CallsShowImagesInput struct {
DataFilePath string `json:"data_file_path"`
Color bool `json:"color"`
ImageSize int `json:"image_size"`
Sixel bool `json:"sixel"`
ITerm bool `json:"iterm"`
}

// CallsShowImagesOutput defines the output for the show-images tool
type CallsShowImagesOutput struct {
SegmentsShown int `json:"segments_shown"`
WavFile string `json:"wav_file"`
Error string `json:"error,omitempty"`
}

// CallsShowImages reads a .data file and displays spectrogram images for each segment
func CallsShowImages(input CallsShowImagesInput) (CallsShowImagesOutput, error) {
var output CallsShowImagesOutput

// Validate file exists
if _, err := os.Stat(input.DataFilePath); os.IsNotExist(err) {
output.Error = fmt.Sprintf("File not found: %s", input.DataFilePath)
return output, fmt.Errorf("%s", output.Error)
}

// Derive WAV file path (strip .data suffix)
wavPath := strings.TrimSuffix(input.DataFilePath, ".data")
output.WavFile = wavPath

// Check WAV file exists
if _, err := os.Stat(wavPath); os.IsNotExist(err) {
output.Error = fmt.Sprintf("WAV file not found: %s", wavPath)
return output, fmt.Errorf("%s", output.Error)
}

// Parse .data file (includes labels for future filtering)
dataFile, err := utils.ParseDataFile(input.DataFilePath)
if err != nil {
output.Error = err.Error()
return output, fmt.Errorf("%s", output.Error)
}

if len(dataFile.Segments) == 0 {
output.Error = "No segments found in .data file"
return output, fmt.Errorf("%s", output.Error)
}

// Resolve image size
imgSize := input.ImageSize
if imgSize == 0 {
imgSize = utils.SpectrogramDisplaySize
}

// Select graphics protocol
protocol := utils.ProtocolKitty
if input.ITerm {
protocol = utils.ProtocolITerm
} else if input.Sixel {
protocol = utils.ProtocolSixel
}

// Generate spectrogram for each segment and output
for i, seg := range dataFile.Segments {
// Generate spectrogram image
img, err := utils.GenerateSegmentSpectrogram(input.DataFilePath, seg.StartTime, seg.EndTime, input.Color, imgSize)
if err != nil || img == nil {
continue
}

// Print segment info
labelInfo := formatSegmentLabels(seg.Labels)
fmt.Fprintf(os.Stderr, "Segment %d: %.1fs - %.1fs (%.1fs)%s\n",
i+1, seg.StartTime, seg.EndTime, seg.EndTime-seg.StartTime, labelInfo)

// Write to stdout via terminal graphics protocol
if err := utils.WriteImage(img, os.Stdout, protocol); err != nil {
output.Error = fmt.Sprintf("Failed to write image: %v", err)
return output, fmt.Errorf("%s", output.Error)
}
fmt.Println() // Newline after image
}

output.SegmentsShown = len(dataFile.Segments)
return output, nil
}

// formatSegmentLabels formats labels for display in segment info
func formatSegmentLabels(labels []*utils.Label) string {
if len(labels) == 0 {
return ""
}
var parts []string
for _, l := range labels {
part := l.Species
if l.CallType != "" {
part += "/" + l.CallType
}
if l.Filter != "" {
part += " [" + l.Filter + "]"
}
parts = append(parts, part)
}
return " " + strings.Join(parts, ", ")
}
file deletion: calls_push_certainty_test.go (----------)

[6.248737]→[6.406958:407010](∅→∅),[6.407010]→[6.403563:403563](∅→∅)

package tools

import (
"encoding/json"
"os"
"path/filepath"
"testing"

"skraak/utils"
)

func TestPushCertaintyPromotesMatchingLabels(t *testing.T) {
tempDir := t.TempDir()

// File with two Kiwi segments: certainty=90 and certainty=70
file1 := `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]], [10, 20, 100, 1000, [{"species": "Kiwi", "certainty": 70}]]]`
file1Path := filepath.Join(tempDir, "file1.data")
if err := os.WriteFile(file1Path, []byte(file1), 0644); err != nil {
t.Fatal(err)
}

// File with one Tomtit at certainty=90 (must not be promoted when species=Kiwi)
file2 := `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Tomtit", "certainty": 90}]]]`
file2Path := filepath.Join(tempDir, "file2.data")
if err := os.WriteFile(file2Path, []byte(file2), 0644); err != nil {
t.Fatal(err)
}

result, err := PushCertainty(PushCertaintyConfig{
Folder: tempDir,
Species: "Kiwi",
Reviewer: "TestReviewer",
})
if err != nil {
t.Fatal(err)
}

if result.SegmentsUpdated != 1 {
t.Errorf("expected 1 segment updated, got %d", result.SegmentsUpdated)
}
if result.FilesUpdated != 1 {
t.Errorf("expected 1 file updated, got %d", result.FilesUpdated)
}

// Verify file1: certainty=90 Kiwi → 100, certainty=70 Kiwi → unchanged
df, err := utils.ParseDataFile(file1Path)
if err != nil {
t.Fatal(err)
}
if df.Segments[0].Labels[0].Certainty != 100 {
t.Errorf("expected certainty=100, got %d", df.Segments[0].Labels[0].Certainty)
}
if df.Segments[1].Labels[0].Certainty != 70 {
t.Errorf("expected certainty=70 unchanged, got %d", df.Segments[1].Labels[0].Certainty)
}
if df.Meta.Reviewer != "TestReviewer" {
t.Errorf("expected reviewer=TestReviewer, got %q", df.Meta.Reviewer)
}

// Verify Tomtit file was not modified
df2, err := utils.ParseDataFile(file2Path)
if err != nil {
t.Fatal(err)
}
if df2.Segments[0].Labels[0].Certainty != 90 {
t.Errorf("Tomtit certainty should be unchanged at 90, got %d", df2.Segments[0].Labels[0].Certainty)
}
}

func TestPushCertaintyFilterScope(t *testing.T) {
tempDir := t.TempDir()

// Segment has two labels from different filters, both Kiwi certainty=90
data := []any{
map[string]any{"Operator": "test"},
[]any{0.0, 10.0, 100.0, 1000.0, []any{
map[string]any{"species": "Kiwi", "certainty": 90, "filter": "model-a"},
map[string]any{"species": "Kiwi", "certainty": 90, "filter": "model-b"},
}},
}
raw, _ := json.Marshal(data)
filePath := filepath.Join(tempDir, "file1.data")
if err := os.WriteFile(filePath, raw, 0644); err != nil {
t.Fatal(err)
}

// Push only model-a
result, err := PushCertainty(PushCertaintyConfig{
Folder: tempDir,
Filter: "model-a",
Species: "Kiwi",
Reviewer: "TestReviewer",
})
if err != nil {
t.Fatal(err)
}
if result.SegmentsUpdated != 1 {
t.Errorf("expected 1 segment updated, got %d", result.SegmentsUpdated)
}

// Verify only model-a label was promoted; model-b stays at 90
df, err := utils.ParseDataFile(filePath)
if err != nil {
t.Fatal(err)
}
for _, label := range df.Segments[0].Labels {
if label.Filter == "model-a" && label.Certainty != 100 {
t.Errorf("model-a label should be 100, got %d", label.Certainty)
}
if label.Filter == "model-b" && label.Certainty != 90 {
t.Errorf("model-b label should be unchanged at 90, got %d", label.Certainty)
}
}
}
file deletion: calls_push_certainty.go (----------)

[6.248737]→[6.409526:409573](∅→∅),[6.409573]→[6.407012:407012](∅→∅)

package tools

import (
"fmt"

"skraak/utils"
)

// PushCertaintyConfig holds the configuration for push-certainty
type PushCertaintyConfig struct {
Folder string
File string
Filter string
Species string
CallType string
Night bool
Day bool
Lat float64
Lng float64
Timezone string
Reviewer string
}

// PushCertaintyResult holds the result of push-certainty
type PushCertaintyResult struct {
SegmentsUpdated int `json:"segments_updated"`
FilesUpdated int `json:"files_updated"`
TimeFilteredCount int `json:"time_filtered_count"`
}

// PushCertainty promotes all certainty=90 segments matching the filter scope to certainty=100.
// Uses identical filtering logic to LoadDataFiles so the scope matches calls classify exactly.
func PushCertainty(config PushCertaintyConfig) (*PushCertaintyResult, error) {
state, err := LoadDataFiles(ClassifyConfig{
Folder: config.Folder,
File: config.File,
Filter: config.Filter,
Species: config.Species,
CallType: config.CallType,
Certainty: 90,
Sample: -1,
Night: config.Night,
Day: config.Day,
Lat: config.Lat,
Lng: config.Lng,
Timezone: config.Timezone,
})
if err != nil {
return nil, err
}

var segsUpdated, filesUpdated int
for i, df := range state.DataFiles {
changed := false
for _, seg := range state.FilteredSegs()[i] {
for _, label := range seg.Labels {
if labelMatchesPush(label, config.Filter, config.Species, config.CallType) {
label.Certainty = 100
changed = true
segsUpdated++
}
}
}
if changed {
df.Meta.Reviewer = config.Reviewer
if err := df.Write(df.FilePath); err != nil {
return nil, fmt.Errorf("write %s: %w", df.FilePath, err)
}
filesUpdated++
}
}

return &PushCertaintyResult{
SegmentsUpdated: segsUpdated,
FilesUpdated: filesUpdated,
TimeFilteredCount: state.TimeFilteredCount,
}, nil
}

// labelMatchesPush returns true if the label matches the push scope and has certainty=90.
// Certainty is already guaranteed by LoadDataFiles, but we re-check to target only the
// specific label that matched (a segment may carry labels from multiple filters).
func labelMatchesPush(label *utils.Label, filter, species, callType string) bool {
if filter != "" && label.Filter != filter {
return false
}
if species != "" && label.Species != species {
return false
}
if callType != "" && label.CallType != callType {
return false
}
return label.Certainty == 90
}
file deletion: calls_propagate_test.go (----------)

[6.248737]→[6.430676:430723](∅→∅),[6.430723]→[6.409575:409575](∅→∅)

package tools

import (
"path/filepath"
"testing"

"skraak/utils"
)

// helpers

func seg(start, end float64, labels ...*utils.Label) *utils.Segment {
return &utils.Segment{
StartTime: start,
EndTime: end,
FreqLow: 100,
FreqHigh: 8000,
Labels: labels,
}
}

func lbl(filter, species, calltype string, certainty int) *utils.Label {
return &utils.Label{
Filter: filter,
Species: species,
CallType: calltype,
Certainty: certainty,
}
}

func writeFile(t *testing.T, segs ...*utils.Segment) string {
t.Helper()
dir := t.TempDir()
path := filepath.Join(dir, "test.data")
df := &utils.DataFile{
Meta: &utils.DataMeta{Operator: "ML", Reviewer: "David", Duration: 3600},
Segments: segs,
}
if err := df.Write(path); err != nil {
t.Fatalf("write fixture: %v", err)
}
return path
}

func readFile(t *testing.T, path string) *utils.DataFile {
t.Helper()
df, err := utils.ParseDataFile(path)
if err != nil {
t.Fatalf("parse %s: %v", path, err)
}
return df
}

// findLabel returns the label with matching filter and time on the parsed file, or nil.
func findLabel(df *utils.DataFile, filter string, start, end float64) *utils.Label {
for _, s := range df.Segments {
if s.StartTime != start || s.EndTime != end {
continue
}
for _, l := range s.Labels {
if l.Filter == filter {
return l
}
}
}
return nil
}

const (
fFrom = "opensoundscape-kiwi-1.2"
fTo = "opensoundscape-kiwi-1.5"
)

func TestPropagate_HappyPathSingle(t *testing.T) {
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v (%s)", err, out.Error)
}
if out.Propagated != 1 || out.TargetsExamined != 1 || out.SkippedConflict != 0 || out.SkippedNoOverlap != 0 {
t.Fatalf("counts wrong: %+v", out)
}

df := readFile(t, path)
target := findLabel(df, fTo, 100, 125)
if target == nil {
t.Fatal("target label missing")
}
if target.Species != "Kiwi" || target.CallType != "Male" || target.Certainty != 90 {
t.Errorf("target not updated correctly: species=%q calltype=%q cert=%d", target.Species, target.CallType, target.Certainty)
}
if df.Meta.Reviewer != "Skraak" {
t.Errorf("reviewer = %q, want Skraak", df.Meta.Reviewer)
}
}

func TestPropagate_NoOverlap(t *testing.T) {
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
seg(500, 525, lbl(fTo, "Kiwi", "Duet", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.Propagated != 0 || out.TargetsExamined != 1 || out.SkippedNoOverlap != 1 {
t.Fatalf("counts wrong: %+v", out)
}
df := readFile(t, path)
target := findLabel(df, fTo, 500, 525)
if target.Certainty != 70 {
t.Errorf("target should not be modified, cert=%d", target.Certainty)
}
if df.Meta.Reviewer != "David" {
t.Errorf("reviewer should stay David (no write), got %q", df.Meta.Reviewer)
}
}

func TestPropagate_SourceWrongSpecies_Ignored(t *testing.T) {
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Weka", "", 100)),
seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.Propagated != 0 || out.SkippedNoOverlap != 1 {
t.Fatalf("counts wrong: %+v", out)
}
}

func TestPropagate_SourceWrongCertainty_Ignored(t *testing.T) {
// cert=70 and cert=0 source labels must NOT count as sources.
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 70)),
seg(200, 225, lbl(fFrom, "Don't Know", "", 0)),
seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
seg(200, 225, lbl(fTo, "Kiwi", "Male", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.Propagated != 0 || out.SkippedNoOverlap != 2 {
t.Fatalf("counts wrong: %+v", out)
}
}

func TestPropagate_SourceWrongFilter_Ignored(t *testing.T) {
path := writeFile(t,
seg(100, 125, lbl("some-other-filter", "Kiwi", "Male", 100)),
seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if !out.FiltersMissing || out.Propagated != 0 || out.TargetsExamined != 0 {
t.Fatalf("expected FiltersMissing=true with zero counts, got: %+v", out)
}
}

func TestPropagate_TargetCert100_NotTouched(t *testing.T) {
// Target with cert=100 is human-verified — must NOT be overwritten.
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
seg(100, 125, lbl(fTo, "Kiwi", "Male", 100)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.TargetsExamined != 0 || out.Propagated != 0 {
t.Fatalf("cert=100 target must not be examined: %+v", out)
}
df := readFile(t, path)
if df.Meta.Reviewer != "David" {
t.Errorf("reviewer should stay David (no write), got %q", df.Meta.Reviewer)
}
}

func TestPropagate_TargetCert90_NotTouched(t *testing.T) {
// Target with cert=90 (already propagated earlier) must NOT be re-propagated.
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
seg(100, 125, lbl(fTo, "Kiwi", "Female", 90)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.TargetsExamined != 0 || out.Propagated != 0 {
t.Fatalf("cert=90 target must not be examined: %+v", out)
}
df := readFile(t, path)
target := findLabel(df, fTo, 100, 125)
if target.Certainty != 90 || target.CallType != "Female" {
t.Errorf("cert=90 target was modified: %+v", target)
}
}

func TestPropagate_TargetCert0_Propagated(t *testing.T) {
// Target at cert=0 ("Don't Know" / "Noise") SHOULD be propagated when an
// overlapping cert=100 source exists — rescues labels from the noise bucket
// so they surface for review even if occasionally wrong.
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
seg(100, 125, lbl(fTo, "Don't Know", "", 0)),
seg(200, 225, lbl(fFrom, "Kiwi", "Female", 100)),
seg(200, 225, lbl(fTo, "Noise", "", 0)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.TargetsExamined != 2 || out.Propagated != 2 {
t.Fatalf("cert=0 targets must be propagated: %+v", out)
}
df := readFile(t, path)
for _, c := range []struct {
start, end float64
calltype string
}{{100, 125, "Male"}, {200, 225, "Female"}} {
l := findLabel(df, fTo, c.start, c.end)
if l == nil || l.Species != "Kiwi" || l.CallType != c.calltype || l.Certainty != 90 {
t.Errorf("at %v-%v got %+v, want Kiwi+%s cert=90", c.start, c.end, l, c.calltype)
}
}
}

func TestPropagate_MultipleSourcesAgree(t *testing.T) {
// Two overlapping sources with same calltype → propagate.
path := writeFile(t,
seg(100, 110, lbl(fFrom, "Kiwi", "Male", 100)),
seg(105, 120, lbl(fFrom, "Kiwi", "Male", 100)),
seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.Propagated != 1 || out.SkippedConflict != 0 {
t.Fatalf("counts wrong: %+v", out)
}
df := readFile(t, path)
target := findLabel(df, fTo, 100, 125)
if target.CallType != "Male" {
t.Errorf("calltype should be Male, got %q", target.CallType)
}
}

func TestPropagate_MultipleSourcesConflict(t *testing.T) {
// Two overlapping sources with different calltypes → conflict, skip, report.
path := writeFile(t,
seg(100, 110, lbl(fFrom, "Kiwi", "Male", 100)),
seg(115, 120, lbl(fFrom, "Kiwi", "Female", 100)),
seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.Propagated != 0 || out.SkippedConflict != 1 {
t.Fatalf("expected 1 conflict skip: %+v", out)
}
if len(out.Conflicts) != 1 {
t.Fatalf("expected 1 conflict report, got %d", len(out.Conflicts))
}
if out.Conflicts[0].TargetStart != 100 || out.Conflicts[0].TargetEnd != 125 {
t.Errorf("conflict target wrong: %+v", out.Conflicts[0])
}
if len(out.Conflicts[0].SourceChoices) != 2 {
t.Errorf("expected 2 source choices, got %d", len(out.Conflicts[0].SourceChoices))
}
// Target must NOT be modified.
df := readFile(t, path)
target := findLabel(df, fTo, 100, 125)
if target.CallType != "Duet" || target.Certainty != 70 {
t.Errorf("conflicted target was modified: %+v", target)
}
if df.Meta.Reviewer != "David" {
t.Errorf("reviewer should stay David (no write), got %q", df.Meta.Reviewer)
}
}

func TestPropagate_EmptyCallTypePropagates(t *testing.T) {
// Source with empty calltype → target gets empty calltype.
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Kiwi", "", 100)),
seg(100, 125, lbl(fTo, "Kiwi", "Male", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.Propagated != 1 {
t.Fatalf("expected propagated=1: %+v", out)
}
df := readFile(t, path)
target := findLabel(df, fTo, 100, 125)
if target.CallType != "" {
t.Errorf("calltype should be cleared, got %q", target.CallType)
}
if target.Species != "Kiwi" || target.Certainty != 90 {
t.Errorf("target fields wrong: %+v", target)
}
}

func TestPropagate_SpeciesOverride(t *testing.T) {
// Target species was different from --species; must be overwritten.
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
seg(100, 125, lbl(fTo, "Don't Know", "", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.Propagated != 1 {
t.Fatalf("expected propagated=1: %+v", out)
}
df := readFile(t, path)
target := findLabel(df, fTo, 100, 125)
if target.Species != "Kiwi" || target.CallType != "Male" || target.Certainty != 90 {
t.Errorf("target not overwritten correctly: %+v", target)
}
}

func TestPropagate_OverlapBoundaryExclusive(t *testing.T) {
// Segments touching at a point (src ends exactly where tgt starts) do NOT overlap.
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
seg(125, 150, lbl(fTo, "Kiwi", "Duet", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.Propagated != 0 || out.SkippedNoOverlap != 1 {
t.Fatalf("touching boundary must not count as overlap: %+v", out)
}
}

func TestPropagate_OverlapPartial(t *testing.T) {
// 1-second overlap is enough.
path := writeFile(t,
seg(100, 126, lbl(fFrom, "Kiwi", "Male", 100)),
seg(125, 150, lbl(fTo, "Kiwi", "Duet", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.Propagated != 1 {
t.Fatalf("expected propagated=1: %+v", out)
}
}

func TestPropagate_SupersetEitherDirection(t *testing.T) {
// Source engulfs target.
path1 := writeFile(t,
seg(100, 200, lbl(fFrom, "Kiwi", "Male", 100)),
seg(110, 150, lbl(fTo, "Kiwi", "Duet", 70)),
)
if out, _ := CallsPropagate(CallsPropagateInput{File: path1, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"}); out.Propagated != 1 {
t.Errorf("source-engulfs-target: %+v", out)
}

// Target engulfs source.
path2 := writeFile(t,
seg(110, 150, lbl(fFrom, "Kiwi", "Male", 100)),
seg(100, 200, lbl(fTo, "Kiwi", "Duet", 70)),
)
if out, _ := CallsPropagate(CallsPropagateInput{File: path2, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"}); out.Propagated != 1 {
t.Errorf("target-engulfs-source: %+v", out)
}
}

func TestPropagate_MissingFlags(t *testing.T) {
cases := []struct {
name string
in CallsPropagateInput
}{
{"no file", CallsPropagateInput{FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"}},
{"no from", CallsPropagateInput{File: "x", ToFilter: fTo, Species: "Kiwi"}},
{"no to", CallsPropagateInput{File: "x", FromFilter: fFrom, Species: "Kiwi"}},
{"no species", CallsPropagateInput{File: "x", FromFilter: fFrom, ToFilter: fTo}},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
_, err := CallsPropagate(c.in)
if err == nil {
t.Errorf("expected error")
}
})
}
}

func TestPropagate_SameFromAndTo(t *testing.T) {
_, err := CallsPropagate(CallsPropagateInput{
File: "x", FromFilter: fFrom, ToFilter: fFrom, Species: "Kiwi",
})
if err == nil {
t.Error("expected error when --from == --to")
}
}

func TestPropagate_NonexistentFile(t *testing.T) {
_, err := CallsPropagate(CallsPropagateInput{
File: "/nonexistent/path.data", FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err == nil {
t.Error("expected error for nonexistent file")
}
}

func TestPropagate_RealisticMixed(t *testing.T) {
// Mimics the 20260228_211500.WAV.data case: cert=0 "Don't Know" and cert=100 Kiwi sources
// coexist; only cert=100 Kiwi gets propagated.
path := writeFile(t,
// Sources (kiwi-1.2)
seg(45, 52.5, lbl(fFrom, "Don't Know", "", 0)),
seg(142.5, 177.5, lbl(fFrom, "Kiwi", "Male", 100)),
seg(195, 217.5, lbl(fFrom, "Don't Know", "", 0)),
seg(647.5, 682.5, lbl(fFrom, "Kiwi", "Female", 100)),
seg(815, 855, lbl(fFrom, "Kiwi", "Duet", 100)),
// Targets (kiwi-1.5)
seg(147.5, 167.5, lbl(fTo, "Kiwi", "Male", 70)),
seg(647.5, 672.5, lbl(fTo, "Kiwi", "Female", 70)),
seg(815, 852.5, lbl(fTo, "Kiwi", "Duet", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.TargetsExamined != 3 || out.Propagated != 3 || out.SkippedConflict != 0 {
t.Fatalf("counts wrong: %+v", out)
}
df := readFile(t, path)
expect := []struct {
start, end float64
calltype string
}{
{147.5, 167.5, "Male"},
{647.5, 672.5, "Female"},
{815, 852.5, "Duet"},
}
for _, e := range expect {
l := findLabel(df, fTo, e.start, e.end)
if l == nil || l.Certainty != 90 || l.CallType != e.calltype || l.Species != "Kiwi" {
t.Errorf("at %v-%v got %+v, want Kiwi+%s cert=90", e.start, e.end, l, e.calltype)
}
}
}

func TestPropagate_NoWriteIfNothingChanged(t *testing.T) {
// File with only non-target segments should not be rewritten (reviewer unchanged).
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.Propagated != 0 || out.TargetsExamined != 0 {
t.Fatalf("expected no activity: %+v", out)
}
df := readFile(t, path)
if df.Meta.Reviewer != "David" {
t.Errorf("reviewer should not be touched, got %q", df.Meta.Reviewer)
}
}

// writeFileAt is like writeFile but puts the file inside an existing dir
// with a caller-provided basename (must end in .data).
func writeFileAt(t *testing.T, dir, base string, segs ...*utils.Segment) string {
t.Helper()
path := filepath.Join(dir, base)
df := &utils.DataFile{
Meta: &utils.DataMeta{Operator: "ML", Reviewer: "David", Duration: 3600},
Segments: segs,
}
if err := df.Write(path); err != nil {
t.Fatalf("write fixture: %v", err)
}
return path
}

func TestPropagateFolder_AggregatesAndSkipsMissing(t *testing.T) {
dir := t.TempDir()

// File A: both filters present, one clean propagation.
aPath := writeFileAt(t, dir, "a.wav.data",
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
)
// File B: only target filter — missing source, must be skipped silently.
bPath := writeFileAt(t, dir, "b.wav.data",
seg(200, 225, lbl(fTo, "Kiwi", "Duet", 70)),
)
// File C: only source filter — missing target, must be skipped silently.
writeFileAt(t, dir, "c.wav.data",
seg(300, 325, lbl(fFrom, "Kiwi", "Male", 100)),
)
// File D: both filters, but no overlap → targets examined, none propagated.
dPath := writeFileAt(t, dir, "d.wav.data",
seg(400, 425, lbl(fFrom, "Kiwi", "Male", 100)),
seg(500, 525, lbl(fTo, "Kiwi", "Duet", 70)),
)

out, err := CallsPropagateFolder(CallsPropagateFolderInput{
Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}

assertPropagateStats(t, out, CallsPropagateFolderOutput{
FilesTotal: 4,
FilesWithBothFilters: 2,
FilesSkippedNoFilter: 2,
FilesChanged: 1,
FilesErrored: 0,
TargetsExamined: 2,
Propagated: 1,
SkippedNoOverlap: 1,
})

t.Run("file_a_propagated", func(t *testing.T) {
aDf := readFile(t, aPath)
if aDf.Meta.Reviewer != "Skraak" {
t.Errorf("reviewer: got %q, want Skraak", aDf.Meta.Reviewer)
}
if l := findLabel(aDf, fTo, 100, 125); l == nil || l.Certainty != 90 || l.CallType != "Male" {
t.Errorf("target label: got %+v, want cert=90 calltype=Male", l)
}
})

t.Run("file_b_skipped", func(t *testing.T) {
bDf := readFile(t, bPath)
if bDf.Meta.Reviewer != "David" {
t.Errorf("reviewer should not be touched, got %q", bDf.Meta.Reviewer)
}
})

t.Run("file_d_no_overlap", func(t *testing.T) {
dDf := readFile(t, dPath)
if dDf.Meta.Reviewer != "David" {
t.Errorf("reviewer should not be touched, got %q", dDf.Meta.Reviewer)
}
if l := findLabel(dDf, fTo, 500, 525); l == nil || l.Certainty != 70 {
t.Errorf("target label should be unchanged cert=70, got %+v", l)
}
})
}

func TestPropagateFolder_EmptyFolder(t *testing.T) {
dir := t.TempDir()
out, err := CallsPropagateFolder(CallsPropagateFolderInput{
Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.FilesTotal != 0 || out.Propagated != 0 {
t.Errorf("expected empty result, got %+v", out)
}
}

func TestPropagateFolder_MissingRequiredFlags(t *testing.T) {
dir := t.TempDir()
cases := []CallsPropagateFolderInput{
{Folder: "", FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"},
{Folder: dir, FromFilter: "", ToFilter: fTo, Species: "Kiwi"},
{Folder: dir, FromFilter: fFrom, ToFilter: "", Species: "Kiwi"},
{Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: ""},
{Folder: dir, FromFilter: fFrom, ToFilter: fFrom, Species: "Kiwi"},
}
for i, in := range cases {
if _, err := CallsPropagateFolder(in); err == nil {
t.Errorf("case %d: expected error for input %+v", i, in)
}
}
}

func TestPropagateFolder_NonexistentFolder(t *testing.T) {
_, err := CallsPropagateFolder(CallsPropagateFolderInput{
Folder: "/nonexistent/path/xyz", FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err == nil {
t.Fatal("expected error for nonexistent folder")
}
}

func TestPropagateFolder_ConflictsTaggedWithFile(t *testing.T) {
dir := t.TempDir()
// Two sources with different calltypes both overlapping one target.
writeFileAt(t, dir, "conflict.wav.data",
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
seg(110, 130, lbl(fFrom, "Kiwi", "Female", 100)),
seg(100, 130, lbl(fTo, "Kiwi", "", 70)),
)

out, err := CallsPropagateFolder(CallsPropagateFolderInput{
Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.SkippedConflict != 1 || len(out.Conflicts) != 1 {
t.Fatalf("expected one conflict, got %+v", out)
}
if out.Conflicts[0].File == "" {
t.Errorf("conflict should be tagged with file path, got %+v", out.Conflicts[0])
}
}
}

// assertPropagateStats checks output stats against expected values.
func assertPropagateStats(t *testing.T, got, want CallsPropagateFolderOutput) {
t.Helper()
checks := []struct {
name string
got int
want int
}{
{"FilesTotal", got.FilesTotal, want.FilesTotal},
{"FilesWithBothFilters", got.FilesWithBothFilters, want.FilesWithBothFilters},
{"FilesSkippedNoFilter", got.FilesSkippedNoFilter, want.FilesSkippedNoFilter},
{"FilesChanged", got.FilesChanged, want.FilesChanged},
{"FilesErrored", got.FilesErrored, want.FilesErrored},
{"TargetsExamined", got.TargetsExamined, want.TargetsExamined},
{"Propagated", got.Propagated, want.Propagated},
{"SkippedNoOverlap", got.SkippedNoOverlap, want.SkippedNoOverlap},
}
for _, c := range checks {
if c.got != c.want {
t.Errorf("%s: got %d, want %d", c.name, c.got, c.want)
}
}
file deletion: calls_propagate.go (----------)

[6.248737]→[6.441079:441121](∅→∅),[6.441121]→[6.430725:430725](∅→∅)

package tools

import (
"fmt"
"os"

"skraak/utils"
)

type CallsPropagateInput struct {
File string `json:"file"`
FromFilter string `json:"from_filter"`
ToFilter string `json:"to_filter"`
Species string `json:"species"`
}

type CallsPropagateOutput struct {
File string `json:"file"`
FromFilter string `json:"from_filter"`
ToFilter string `json:"to_filter"`
Species string `json:"species"`
FiltersMissing bool `json:"filters_missing,omitempty"`
TargetsExamined int `json:"targets_examined"`
Propagated int `json:"propagated"`
SkippedNoOverlap int `json:"skipped_no_overlap"`
SkippedConflict int `json:"skipped_conflict"`
Conflicts []PropagateConflict `json:"conflicts,omitempty"`
Changes []PropagateChange `json:"changes,omitempty"`
Error string `json:"error,omitempty"`
}

type CallsPropagateFolderInput struct {
Folder string `json:"folder"`
FromFilter string `json:"from_filter"`
ToFilter string `json:"to_filter"`
Species string `json:"species"`
}

type CallsPropagateFolderOutput struct {
Folder string `json:"folder"`
FromFilter string `json:"from_filter"`
ToFilter string `json:"to_filter"`
Species string `json:"species"`
FilesTotal int `json:"files_total"`
FilesWithBothFilters int `json:"files_with_both_filters"`
FilesSkippedNoFilter int `json:"files_skipped_no_filter"`
FilesChanged int `json:"files_changed"`
FilesErrored int `json:"files_errored"`
TargetsExamined int `json:"targets_examined"`
Propagated int `json:"propagated"`
SkippedNoOverlap int `json:"skipped_no_overlap"`
SkippedConflict int `json:"skipped_conflict"`
Conflicts []PropagateConflict `json:"conflicts,omitempty"`
Errors []CallsPropagateOutput `json:"errors,omitempty"`
Error string `json:"error,omitempty"`
}

type PropagateConflict struct {
File string `json:"file,omitempty"`
TargetStart float64 `json:"target_start"`
TargetEnd float64 `json:"target_end"`
TargetCallType string `json:"target_calltype,omitempty"`
SourceChoices []PropagateSourceChoice `json:"source_choices"`
}

type PropagateSourceChoice struct {
Start float64 `json:"start"`
End float64 `json:"end"`
Species string `json:"species"`
CallType string `json:"calltype,omitempty"`
}

type PropagateChange struct {
TargetStart float64 `json:"target_start"`
TargetEnd float64 `json:"target_end"`
PrevSpecies string `json:"prev_species"`
PrevCallType string `json:"prev_calltype,omitempty"`
PrevCertainty int `json:"prev_certainty"`
NewSpecies string `json:"new_species"`
NewCallType string `json:"new_calltype,omitempty"`
NewCertainty int `json:"new_certainty"`
}

// CallsPropagate copies verified classifications (certainty==100) from one filter's
// segments to overlapping target segments of another filter, within a single .data file.
// Target labels with certainty==70 (ML-unverified) or certainty==0 (Don't Know / Noise)
// are updated — targets at certainty==100 (human-verified) and certainty==90 (already
// propagated) are left alone. Only source labels matching --species are considered.
// Propagated target labels are set to certainty=90 and file reviewer is set to "Skraak".
func CallsPropagate(input CallsPropagateInput) (CallsPropagateOutput, error) {
output := CallsPropagateOutput{
File: input.File,
FromFilter: input.FromFilter,
ToFilter: input.ToFilter,
Species: input.Species,
}

if err := validatePropagateInput(&output, input); err != nil {
return output, err
}

df, err := utils.ParseDataFile(input.File)
if err != nil {
output.Error = fmt.Sprintf("parse %s: %v", input.File, err)
return output, fmt.Errorf("%s", output.Error)
}

// Fast path: skip files that don't contain both filters at all.
if !hasBothFilters(df, input.FromFilter, input.ToFilter) {
output.FiltersMissing = true
return output, nil
}

sources := collectPropagateSources(df, input.FromFilter, input.Species)

propagateTargets(df, sources, input, &output)

if output.Propagated > 0 {
df.Meta.Reviewer = "Skraak"
if err := df.Write(input.File); err != nil {
output.Error = fmt.Sprintf("write %s: %v", input.File, err)
return output, fmt.Errorf("%s", output.Error)
}
}

return output, nil
}

// validatePropagateInput checks required fields and file existence
func validatePropagateInput(output *CallsPropagateOutput, input CallsPropagateInput) error {
checks := []struct {
val string
msg string
}{
{input.File, "--file is required"},
{input.FromFilter, "--from is required"},
{input.ToFilter, "--to is required"},
{input.Species, "--species is required"},
}
for _, c := range checks {
if c.val == "" {
output.Error = c.msg
return fmt.Errorf("%s", c.msg)
}
}
if input.FromFilter == input.ToFilter {
output.Error = "--from and --to must differ"
return fmt.Errorf("%s", output.Error)
}
if _, err := os.Stat(input.File); os.IsNotExist(err) {
output.Error = fmt.Sprintf("file not found: %s", input.File)
return fmt.Errorf("%s", output.Error)
}

// hasBothFilters checks whether the data file contains both from and to filters
func hasBothFilters(df *utils.DataFile, fromFilter, toFilter string) bool {
hasFrom, hasTo := false, false
for _, seg := range df.Segments {
for _, lbl := range seg.Labels {
if lbl.Filter == fromFilter {
hasFrom = true
}
if lbl.Filter == toFilter {
hasTo = true
}
if hasFrom && hasTo {
return true
}
}
}

// sourceRef pairs a segment with its matching source label
type sourceRef struct {
seg *utils.Segment
label *utils.Label
}

// collectPropagateSources gathers verified source labels (certainty==100) for the given filter/species
func collectPropagateSources(df *utils.DataFile, fromFilter, species string) []sourceRef {
var sources []sourceRef
for _, seg := range df.Segments {
for _, lbl := range seg.Labels {
if lbl.Filter == fromFilter && lbl.Species == species && lbl.Certainty == 100 {
sources = append(sources, sourceRef{seg: seg, label: lbl})
break
}
}
}

// propagateTargets iterates target segments, finds overlapping sources, and applies agreed classifications
func propagateTargets(df *utils.DataFile, sources []sourceRef, input CallsPropagateInput, output *CallsPropagateOutput) {
for _, tSeg := range df.Segments {
toLabel := findUpdatableTargetLabel(tSeg.Labels, input.ToFilter)
if toLabel == nil {
continue
}
output.TargetsExamined++

overlaps := findOverlappingSources(sources, tSeg)
if len(overlaps) == 0 {
output.SkippedNoOverlap++
continue
}

agreedCallType, conflict := resolveCallType(overlaps)
if conflict {
output.SkippedConflict++
output.Conflicts = append(output.Conflicts, buildConflictRecord(tSeg, toLabel, overlaps))
continue
}

applyPropagation(toLabel, input.Species, agreedCallType, tSeg, output)
}
}

// findUpdatableTargetLabel finds a target label with certainty 70 or 0 for the given filter
func findUpdatableTargetLabel(labels []*utils.Label, toFilter string) *utils.Label {
for _, lbl := range labels {
if lbl.Filter == toFilter && (lbl.Certainty == 70 || lbl.Certainty == 0) {
return lbl
}
}
return nil
}

// findOverlappingSources returns sources whose segments overlap with the target segment
func findOverlappingSources(sources []sourceRef, tSeg *utils.Segment) []sourceRef {
var overlaps []sourceRef
for _, s := range sources {
if s.seg.StartTime < tSeg.EndTime && tSeg.StartTime < s.seg.EndTime {
overlaps = append(overlaps, s)
}

// resolveCallType checks if all overlapping sources agree on a call type.
// Returns the agreed call type and whether there is a conflict.
func resolveCallType(overlaps []sourceRef) (string, bool) {
agreedCallType := overlaps[0].label.CallType
for _, s := range overlaps[1:] {
if s.label.CallType != agreedCallType {
return "", true
}
}
return agreedCallType, false
}

// buildConflictRecord creates a PropagateConflict from overlapping disagreeing sources
func buildConflictRecord(tSeg *utils.Segment, toLabel *utils.Label, overlaps []sourceRef) PropagateConflict {
choices := make([]PropagateSourceChoice, 0, len(overlaps))
for _, s := range overlaps {
choices = append(choices, PropagateSourceChoice{
Start: s.seg.StartTime,
End: s.seg.EndTime,
Species: s.label.Species,
CallType: s.label.CallType,
})
}
return PropagateConflict{
TargetStart: tSeg.StartTime,
TargetEnd: tSeg.EndTime,
TargetCallType: toLabel.CallType,
SourceChoices: choices,
}

// applyPropagation updates the target label and records the change
func applyPropagation(toLabel *utils.Label, species, callType string, tSeg *utils.Segment, output *CallsPropagateOutput) {
change := PropagateChange{
TargetStart: tSeg.StartTime,
TargetEnd: tSeg.EndTime,
PrevSpecies: toLabel.Species,
PrevCallType: toLabel.CallType,
PrevCertainty: toLabel.Certainty,
NewSpecies: species,
NewCallType: callType,
NewCertainty: 90,
}

output.Propagated++
output.Changes = append(output.Changes, change)
}

// CallsPropagateFolder runs CallsPropagate against every .data file in a folder,
// aggregating counts. Files that do not contain both --from and --to filters are
// skipped silently (counted as files_skipped_no_filter). Parse/write errors on
// individual files are collected in Errors; they don't abort the run.
func CallsPropagateFolder(input CallsPropagateFolderInput) (CallsPropagateFolderOutput, error) {
output := CallsPropagateFolderOutput{
Folder: input.Folder,
FromFilter: input.FromFilter,
ToFilter: input.ToFilter,
Species: input.Species,
}

if input.Folder == "" {
output.Error = "--folder is required"
return output, fmt.Errorf("%s", output.Error)
}
if input.FromFilter == "" {
output.Error = "--from is required"
return output, fmt.Errorf("%s", output.Error)
}
if input.ToFilter == "" {
output.Error = "--to is required"
return output, fmt.Errorf("%s", output.Error)
}
if input.Species == "" {
output.Error = "--species is required"
return output, fmt.Errorf("%s", output.Error)
}
if input.FromFilter == input.ToFilter {
output.Error = "--from and --to must differ"
return output, fmt.Errorf("%s", output.Error)
}

info, err := os.Stat(input.Folder)
if err != nil {
output.Error = fmt.Sprintf("folder not found: %s", input.Folder)
return output, fmt.Errorf("%s", output.Error)
}
if !info.IsDir() {
output.Error = fmt.Sprintf("not a directory: %s", input.Folder)
return output, fmt.Errorf("%s", output.Error)
}

files, err := utils.FindDataFiles(input.Folder)
if err != nil {
output.Error = fmt.Sprintf("list .data files: %v", err)
return output, fmt.Errorf("%s", output.Error)
}
output.FilesTotal = len(files)

for _, f := range files {
fileOut, err := CallsPropagate(CallsPropagateInput{
File: f,
FromFilter: input.FromFilter,
ToFilter: input.ToFilter,
Species: input.Species,
})
if err != nil {
output.FilesErrored++
output.Errors = append(output.Errors, fileOut)
continue
}
if fileOut.FiltersMissing {
output.FilesSkippedNoFilter++
continue
}
output.FilesWithBothFilters++
output.TargetsExamined += fileOut.TargetsExamined
output.Propagated += fileOut.Propagated
output.SkippedNoOverlap += fileOut.SkippedNoOverlap
output.SkippedConflict += fileOut.SkippedConflict
if fileOut.Propagated > 0 {
output.FilesChanged++
}
for _, c := range fileOut.Conflicts {
c.File = f
output.Conflicts = append(output.Conflicts, c)
}
}

return output, nil
}

toLabel.Species = species
toLabel.CallType = callType
toLabel.Certainty = 90
}
}
return overlaps
}
return sources
}
return false
}
return nil
}
file deletion: calls_modify_test.go (----------)

[6.248737]→[6.450654:450698](∅→∅),[6.450698]→[6.441123:441123](∅→∅)

package tools

import (
"path/filepath"
"testing"

"skraak/utils"
)

func TestCallsModifyBookmark(t *testing.T) {
// Create a temp .data file with a bookmarked segment
tmpDir := t.TempDir()
tmpFile := filepath.Join(tmpDir, "test.data")

df := &utils.DataFile{
Meta: &utils.DataMeta{Operator: "test", Duration: 60},
Segments: []*utils.Segment{
{
StartTime: 10.0,
EndTime: 15.0,
FreqLow: 100,
FreqHigh: 5000,
Labels: []*utils.Label{
{Species: "Kiwi", Certainty: 80, Filter: "myfilter", CallType: "Duet", Bookmark: true},
},
},
},
}
if err := df.Write(tmpFile); err != nil {
t.Fatalf("failed to write test file: %v", err)
}

// Test 1: Adding bookmark when already true should do nothing
bookmark := true
result, err := CallsModify(CallsModifyInput{
File: tmpFile,
Reviewer: "tester",
Filter: "myfilter",
Segment: "10-15",
Certainty: 80,
Bookmark: &bookmark,
})

// Should return error "no changes needed"
if err == nil {
t.Errorf("expected error 'no changes needed' when bookmark already true, got nil")
}
if result.Error != "No changes needed: all values already match" {
t.Errorf("expected 'no changes needed' error, got: %s", result.Error)
}

// Verify bookmark is still true in the file
df2, err := utils.ParseDataFile(tmpFile)
if err != nil {
t.Fatalf("failed to parse file: %v", err)
}
if !df2.Segments[0].Labels[0].Bookmark {
t.Errorf("bookmark should still be true, got false")
}
}

func TestCallsModifyBookmarkFalse(t *testing.T) {
// Create a temp .data file WITHOUT a bookmark
tmpDir := t.TempDir()
tmpFile := filepath.Join(tmpDir, "test.data")

df := &utils.DataFile{
Meta: &utils.DataMeta{Operator: "test", Duration: 60},
Segments: []*utils.Segment{
{
StartTime: 10.0,
EndTime: 15.0,
FreqLow: 100,
FreqHigh: 5000,
Labels: []*utils.Label{
{Species: "Kiwi", Certainty: 80, Filter: "myfilter", CallType: "Duet", Bookmark: false},
},
},
},
}
if err := df.Write(tmpFile); err != nil {
t.Fatalf("failed to write test file: %v", err)
}

// Test: Adding bookmark when false should set it to true
bookmark := true
result, err := CallsModify(CallsModifyInput{
File: tmpFile,
Reviewer: "tester",
Filter: "myfilter",
Segment: "10-15",
Certainty: 80,
Bookmark: &bookmark,
})

if err != nil {
t.Errorf("unexpected error: %v", err)
}
if result.Bookmark == nil || !*result.Bookmark {
t.Errorf("expected bookmark=true in result, got %v", result.Bookmark)
}

// Verify bookmark is true in the file
df2, err := utils.ParseDataFile(tmpFile)
if err != nil {
t.Fatalf("failed to parse file: %v", err)
}
if !df2.Segments[0].Labels[0].Bookmark {
t.Errorf("bookmark should be true, got false")
}
}

func TestCallsModifyCommentAdditive(t *testing.T) {
// Create a temp .data file with an existing comment
tmpDir := t.TempDir()
tmpFile := filepath.Join(tmpDir, "test.data")

df := &utils.DataFile{
Meta: &utils.DataMeta{Operator: "test", Duration: 60},
Segments: []*utils.Segment{
{
StartTime: 10.0,
EndTime: 15.0,
FreqLow: 100,
FreqHigh: 5000,
Labels: []*utils.Label{
{Species: "Kiwi", Certainty: 80, Filter: "myfilter", Comment: "First observation"},
},
},
},
}
if err := df.Write(tmpFile); err != nil {
t.Fatalf("failed to write test file: %v", err)
}

// Test: Adding comment should be additive
result, err := CallsModify(CallsModifyInput{
File: tmpFile,
Reviewer: "tester",
Filter: "myfilter",
Segment: "10-15",
Certainty: 80,
Comment: "Good example",
})

if err != nil {
t.Errorf("unexpected error: %v", err)
}

expectedComment := "First observation | Good example"
if result.Comment != expectedComment {
t.Errorf("expected comment=%q, got %q", expectedComment, result.Comment)
}

// Verify comment in file
df2, err := utils.ParseDataFile(tmpFile)
if err != nil {
t.Fatalf("failed to parse file: %v", err)
}
if df2.Segments[0].Labels[0].Comment != expectedComment {
t.Errorf("expected comment in file=%q, got %q", expectedComment, df2.Segments[0].Labels[0].Comment)
}
}

func TestCallsModifyCommentAdditiveMultiple(t *testing.T) {
// Create a temp .data file and add multiple comments
tmpDir := t.TempDir()
tmpFile := filepath.Join(tmpDir, "test.data")

df := &utils.DataFile{
Meta: &utils.DataMeta{Operator: "test", Duration: 60},
Segments: []*utils.Segment{
{
StartTime: 10.0,
EndTime: 15.0,
FreqLow: 100,
FreqHigh: 5000,
Labels: []*utils.Label{
{Species: "Kiwi", Certainty: 80, Filter: "myfilter"},
},
},
},
}
if err := df.Write(tmpFile); err != nil {
t.Fatalf("failed to write test file: %v", err)
}

// Add first comment
_, err := CallsModify(CallsModifyInput{
File: tmpFile,
Reviewer: "tester",
Filter: "myfilter",
Segment: "10-15",
Certainty: 80,
Comment: "First",
})
if err != nil {
t.Fatalf("unexpected error on first comment: %v", err)
}

// Add second comment
_, err = CallsModify(CallsModifyInput{
File: tmpFile,
Reviewer: "tester",
Filter: "myfilter",
Segment: "10-15",
Certainty: 80,
Comment: "Second",
})
if err != nil {
t.Fatalf("unexpected error on second comment: %v", err)
}

// Add third comment
result, err := CallsModify(CallsModifyInput{
File: tmpFile,
Reviewer: "tester",
Filter: "myfilter",
Segment: "10-15",
Certainty: 80,
Comment: "Third",
})
if err != nil {
t.Fatalf("unexpected error on third comment: %v", err)
}

expectedComment := "First | Second | Third"
if result.Comment != expectedComment {
t.Errorf("expected comment=%q, got %q", expectedComment, result.Comment)
}
}

func TestCallsModifyCommentTooLong(t *testing.T) {
// Create a temp .data file with an existing long comment
tmpDir := t.TempDir()
tmpFile := filepath.Join(tmpDir, "test.data")

existingComment := "This is a fairly long existing comment that takes up space"
df := &utils.DataFile{
Meta: &utils.DataMeta{Operator: "test", Duration: 60},
Segments: []*utils.Segment{
{
StartTime: 10.0,
EndTime: 15.0,
FreqLow: 100,
FreqHigh: 5000,
Labels: []*utils.Label{
{Species: "Kiwi", Certainty: 80, Filter: "myfilter", Comment: existingComment},
},
},
},
}
if err := df.Write(tmpFile); err != nil {
t.Fatalf("failed to write test file: %v", err)
}

// Test: Adding a long comment that would exceed 140 chars should fail
longNewComment := "This is another very long comment that when combined with the existing one will exceed the limit"
result, err := CallsModify(CallsModifyInput{
File: tmpFile,
Reviewer: "tester",
Filter: "myfilter",
Segment: "10-15",
Certainty: 80,
Comment: longNewComment,
})

if err == nil {
t.Errorf("expected error for combined comment exceeding 140 chars, got nil")
}
if result.Error == "" {
t.Errorf("expected error message, got empty")
}

// Verify original comment is preserved
df2, err := utils.ParseDataFile(tmpFile)
if err != nil {
t.Fatalf("failed to parse file: %v", err)
}
if df2.Segments[0].Labels[0].Comment != existingComment {
t.Errorf("original comment should be preserved, got %q", df2.Segments[0].Labels[0].Comment)
}
}

func TestCallsModifyPreservesBookmarkOnOtherChange(t *testing.T) {
// Create a temp .data file with a bookmark
tmpDir := t.TempDir()
tmpFile := filepath.Join(tmpDir, "test.data")

df := &utils.DataFile{
Meta: &utils.DataMeta{Operator: "test", Duration: 60},
Segments: []*utils.Segment{
{
StartTime: 10.0,
EndTime: 15.0,
FreqLow: 100,
FreqHigh: 5000,
Labels: []*utils.Label{
{Species: "Kiwi", Certainty: 80, Filter: "myfilter", Bookmark: true},
},
},
},
}
if err := df.Write(tmpFile); err != nil {
t.Fatalf("failed to write test file: %v", err)
}

// Change certainty (without passing --bookmark) - bookmark should be preserved
result, err := CallsModify(CallsModifyInput{
File: tmpFile,
Reviewer: "tester",
Filter: "myfilter",
Segment: "10-15",
Certainty: 100,
// No Bookmark set
})

if err != nil {
t.Errorf("unexpected error: %v", err)
}
if result.Bookmark != nil {
t.Errorf("bookmark should not be in output when not changed, got %v", result.Bookmark)
}

// Verify bookmark is still true in the file
df2, err := utils.ParseDataFile(tmpFile)
if err != nil {
t.Fatalf("failed to parse file: %v", err)
}
if !df2.Segments[0].Labels[0].Bookmark {
t.Errorf("bookmark should still be true after changing certainty, got false")
}
}

func TestCallsModifyInvalidSegment(t *testing.T) {
tmpDir := t.TempDir()
tmpFile := filepath.Join(tmpDir, "test.data")

df := &utils.DataFile{
Meta: &utils.DataMeta{Operator: "test", Duration: 60},
Segments: []*utils.Segment{
{
StartTime: 10.0,
EndTime: 15.0,
FreqLow: 100,
FreqHigh: 5000,
Labels: []*utils.Label{
{Species: "Kiwi", Certainty: 80, Filter: "myfilter"},
},
},
},
}
if err := df.Write(tmpFile); err != nil {
t.Fatalf("failed to write test file: %v", err)
}

// Test: Non-existent segment should error
result, err := CallsModify(CallsModifyInput{
File: tmpFile,
Reviewer: "tester",
Filter: "myfilter",
Segment: "99-100",
Certainty: 80,
})

if err == nil {
t.Errorf("expected error for non-existent segment, got nil")
}
if result.Error == "" {
t.Errorf("expected error message, got empty")
}
}
file deletion: calls_modify.go (----------)

[6.248737]→[6.458192:458231](∅→∅),[6.458231]→[6.450700:450700](∅→∅)

package tools

import (
"fmt"
"math"
"os"
"strings"

"skraak/utils"
)

// CallsModifyInput defines the input for the modify tool
type CallsModifyInput struct {
File string `json:"file"`
Reviewer string `json:"reviewer"`
Filter string `json:"filter"`
Segment string `json:"segment"`
Certainty int `json:"certainty"`
Species string `json:"species"`
Bookmark *bool `json:"bookmark"`
Comment string `json:"comment"`
}

// CallsModifyOutput defines the output for the modify tool
type CallsModifyOutput struct {
File string `json:"file"`
SegmentStart int `json:"segment_start"`
SegmentEnd int `json:"segment_end"`
Species string `json:"species,omitempty"`
CallType string `json:"calltype,omitempty"`
Certainty int `json:"certainty,omitempty"`
Bookmark *bool `json:"bookmark,omitempty"`
Comment string `json:"comment,omitempty"`
PreviousValue string `json:"previous_value,omitempty"`
Error string `json:"error,omitempty"`
}

// validateModifyInput checks required fields and comment constraints.
func validateModifyInput(input CallsModifyInput) error {
if input.File == "" {
return fmt.Errorf("--file is required")
}
if input.Reviewer == "" {
return fmt.Errorf("--reviewer is required")
}
if input.Filter == "" {
return fmt.Errorf("--filter is required")
}
if input.Segment == "" {
return fmt.Errorf("--segment is required")
}
if len(input.Comment) > 140 {
return fmt.Errorf("--comment must be 140 characters or less")
}
for i, r := range input.Comment {
if r > 127 {
return fmt.Errorf("--comment must be ASCII only (non-ASCII at position %d)", i)
}
}
return nil
}

// resolveSpecies parses species+calltype from the input species string.
// If input species is empty, keeps the existing label values.
func resolveSpecies(inputSpecies string, label *utils.Label) (species, callType string) {
if inputSpecies == "" {
return label.Species, label.CallType
}
if before, after, ok := strings.Cut(inputSpecies, "+"); ok {
return before, after
}
return inputSpecies, ""
}

// hasModifyChanges checks whether any field would actually change.
func hasModifyChanges(newSpecies, newCallType string, input CallsModifyInput, label *utils.Label) bool {
if newSpecies != label.Species || newCallType != label.CallType {
return true
}
if input.Certainty != label.Certainty {
return true
}
if input.Bookmark != nil && *input.Bookmark != label.Bookmark {
return true
}
if input.Comment != "" {
return true
}
return false
}

// applyLabelChanges updates the label and data file, populating the output.
func applyLabelChanges(label *utils.Label, dataFile *utils.DataFile, input CallsModifyInput, newSpecies, newCallType string, output *CallsModifyOutput) error {
dataFile.Meta.Reviewer = input.Reviewer

label.Species = newSpecies
label.CallType = newCallType
output.Species = newSpecies
output.CallType = newCallType

label.Certainty = input.Certainty
output.Certainty = input.Certainty

if input.Bookmark != nil && *input.Bookmark != label.Bookmark {
label.Bookmark = *input.Bookmark
output.Bookmark = input.Bookmark
}

if input.Comment != "" {
var newComment string
if label.Comment != "" {
newComment = label.Comment + " | " + input.Comment
} else {
newComment = input.Comment
}
if len(newComment) > 140 {
return fmt.Errorf("combined comment exceeds 140 characters (%d)", len(newComment))
}
}

output.File = input.File
output.SegmentStart = startTime
output.SegmentEnd = endTime

if _, err := os.Stat(input.File); os.IsNotExist(err) {
output.Error = fmt.Sprintf("File not found: %s", input.File)
return output, fmt.Errorf("%s", output.Error)
}

dataFile, err := utils.ParseDataFile(input.File)
if err != nil {
output.Error = fmt.Sprintf("Failed to parse file: %v", err)
return output, fmt.Errorf("%s", output.Error)
}

segment := findSegment(dataFile.Segments, startTime, endTime, input.Filter)
if segment == nil {
output.Error = fmt.Sprintf("No segment found matching time range %d-%d", startTime, endTime)
return output, fmt.Errorf("%s", output.Error)
}

if targetLabel == nil {
output.Error = fmt.Sprintf("No label found with filter '%s' in segment %d-%d", input.Filter, startTime, endTime)
return output, fmt.Errorf("%s", output.Error)
}

output.PreviousValue = formatLabel(targetLabel)

newSpecies, newCallType := resolveSpecies(input.Species, targetLabel)

if !hasModifyChanges(newSpecies, newCallType, input, targetLabel) {
output.Error = "No changes needed: all values already match"
return output, fmt.Errorf("%s", output.Error)
}

if err := applyLabelChanges(targetLabel, dataFile, input, newSpecies, newCallType, &output); err != nil {
output.Error = err.Error()
return output, err
}

if err := dataFile.Write(input.File); err != nil {
output.Error = fmt.Sprintf("Failed to save file: %v", err)
return output, fmt.Errorf("%s", output.Error)
}

return output, nil
}

// parseSegmentRange parses "12-15" format into start and end integers
func parseSegmentRange(s string) (int, int, error) {
parts := strings.Split(s, "-")
if len(parts) != 2 {
return 0, 0, fmt.Errorf("invalid segment format: %s (expected start-end, e.g., 12-15)", s)
}

var start, end int
if _, err := fmt.Sscanf(parts[0], "%d", &start); err != nil {
return 0, 0, fmt.Errorf("invalid start time: %s", parts[0])
}
if _, err := fmt.Sscanf(parts[1], "%d", &end); err != nil {
return 0, 0, fmt.Errorf("invalid end time: %s", parts[1])
}

if start < 0 || end < 0 {
return 0, 0, fmt.Errorf("times must be non-negative")
}
if start >= end {
return 0, 0, fmt.Errorf("start time must be less than end time")
}

return start, end, nil
}

// findSegment finds a segment matching the time range using floor/ceil matching.
// It also checks that the segment contains a label with the specified filter,
// so that duplicate segments (same time range, different filters) are resolved correctly.
func findSegment(segments []*utils.Segment, startTime, endTime int, filter string) *utils.Segment {
for _, seg := range segments {
segStart := int(math.Floor(seg.StartTime))
segEnd := int(math.Ceil(seg.EndTime))
if segEnd == segStart {
segEnd = segStart + 1 // minimum 1 second
}
if segStart == startTime && segEnd == endTime {
for _, label := range seg.Labels {
if label.Filter == filter {
return seg
}
}
}
}
return nil
}

// formatLabel formats a label for display
func formatLabel(label *utils.Label) string {
result := label.Species
if label.CallType != "" {
result += "+" + label.CallType
}
result += fmt.Sprintf(" (%d%%)", label.Certainty)
return result
}
}

// findLabelByFilter finds the first label matching the given filter in a segment.
func findLabelByFilter(segment *utils.Segment, filter string) *utils.Label {
for _, label := range segment.Labels {
if label.Filter == filter {
return label
}
}
return nil
targetLabel := findLabelByFilter(segment, input.Filter)
startTime, endTime, err := parseSegmentRange(input.Segment)
if err != nil {
output.Error = err.Error()
return output, err
}

label.Comment = newComment
output.Comment = newComment
}

return nil
}

// CallsModify modifies a label in a .data file
func CallsModify(input CallsModifyInput) (CallsModifyOutput, error) {
var output CallsModifyOutput

if err := validateModifyInput(input); err != nil {
output.Error = err.Error()
return output, err
file deletion: calls_from_raven.go (----------)

[6.248737]→[6.471700:471743](∅→∅),[6.471743]→[6.458233:458233](∅→∅)

package tools

import (
"bufio"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"

"skraak/utils"
)

// CallsFromRavenInput defines the input for the calls-from-raven tool
type CallsFromRavenInput struct {
Folder string `json:"folder"`
File string `json:"file"`
Delete bool `json:"delete"`
ProgressHandler ProgressHandler `json:"-"` // Optional progress callback
}

// CallsFromRavenOutput defines the output for the calls-from-raven tool
type CallsFromRavenOutput struct {
Calls []ClusteredCall `json:"calls"`
TotalCalls int `json:"total_calls"`
SpeciesCount map[string]int `json:"species_count"`
DataFilesWritten int `json:"data_files_written"`
DataFilesSkipped int `json:"data_files_skipped"`
FilesProcessed int `json:"files_processed"`
FilesDeleted int `json:"files_deleted"`
Filter string `json:"filter"`
Error *string `json:"error,omitempty"`
}

// ravenSource implements CallSource for Raven selection files
type ravenSource struct{}

func (ravenSource) Name() string { return "Raven" }

func (ravenSource) FindFiles(folder string) ([]string, error) {
var files []string

entries, err := os.ReadDir(folder)
if err != nil {
return nil, err
}

for _, entry := range entries {
name := entry.Name()
if strings.HasSuffix(name, ".selections.txt") {
files = append(files, filepath.Join(folder, name))
}
}

return files, nil
}

func (ravenSource) ProcessFile(ravenFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
return processRavenFileCached(ravenFile, cache)
}

// CallsFromRaven processes Raven selection files and writes .data files
func CallsFromRaven(input CallsFromRavenInput) (CallsFromRavenOutput, error) {
src := ravenSource{}
commonInput := CallsFromSourceInput(input)

commonOutput, err := callsFromSource(src, commonInput)

// Convert to Raven-specific output type
var output CallsFromRavenOutput
output.Calls = commonOutput.Calls
output.TotalCalls = commonOutput.TotalCalls
output.SpeciesCount = commonOutput.SpeciesCount
output.DataFilesWritten = commonOutput.DataFilesWritten
output.DataFilesSkipped = commonOutput.DataFilesSkipped
output.FilesProcessed = commonOutput.FilesProcessed
output.FilesDeleted = commonOutput.FilesDeleted
output.Filter = commonOutput.Filter
output.Error = commonOutput.Error
return output, err
}

// RavenSelection represents a single Raven selection
type RavenSelection struct {
StartTime float64
EndTime float64
FreqLow float64
FreqHigh float64
Species string
}

// ravenColumnIndices holds the column index positions for a Raven file
type ravenColumnIndices struct {
beginTimeIdx int
endTimeIdx int
lowFreqIdx int
highFreqIdx int
speciesIdx int
}

for i, col := range header {
switch col {
case "Begin Time (s)":
idx.beginTimeIdx = i
case "End Time (s)":
idx.endTimeIdx = i
case "Low Freq (Hz)":
idx.lowFreqIdx = i
case "High Freq (Hz)":
idx.highFreqIdx = i
case "Species":
idx.speciesIdx = i
}
}
if idx.beginTimeIdx == -1 || idx.endTimeIdx == -1 || idx.speciesIdx == -1 {
return idx, fmt.Errorf("missing required columns in Raven file")
}

// parseRavenSelections reads all selection rows from a scanner and returns parsed selections
func parseRavenSelections(scanner *bufio.Scanner, idx ravenColumnIndices) ([]RavenSelection, error) {
var selections []RavenSelection
for scanner.Scan() {
line := scanner.Text()
if line == "" {
continue
}

fields := strings.Split(line, "\t")
if len(fields) <= idx.speciesIdx {
continue
}

sel, err := parseRavenRow(fields, idx)
if err != nil {
return nil, err
}
selections = append(selections, sel)
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("error reading file: %w", err)
}

// parseRavenRow parses a single tab-separated row into a RavenSelection
func parseRavenRow(fields []string, idx ravenColumnIndices) (RavenSelection, error) {
var sel RavenSelection
startTime, err := strconv.ParseFloat(fields[idx.beginTimeIdx], 64)
if err != nil {
return sel, fmt.Errorf("failed to parse begin time %q: %w", fields[idx.beginTimeIdx], err)
}
sel.StartTime = startTime

endTime, err := strconv.ParseFloat(fields[idx.endTimeIdx], 64)
if err != nil {
return sel, fmt.Errorf("failed to parse end time %q: %w", fields[idx.endTimeIdx], err)
}
sel.EndTime = endTime

if idx.lowFreqIdx >= 0 && idx.lowFreqIdx < len(fields) {
freqLow, err := strconv.ParseFloat(fields[idx.lowFreqIdx], 64)
if err != nil {
return sel, fmt.Errorf("failed to parse low freq %q: %w", fields[idx.lowFreqIdx], err)
}
sel.FreqLow = freqLow
}
if idx.highFreqIdx >= 0 && idx.highFreqIdx < len(fields) {
freqHigh, err := strconv.ParseFloat(fields[idx.highFreqIdx], 64)
if err != nil {
return sel, fmt.Errorf("failed to parse high freq %q: %w", fields[idx.highFreqIdx], err)
}
sel.FreqHigh = freqHigh
}

// deriveWAVBaseName extracts the base WAV filename from a Raven .selections.txt filename
func deriveWAVBaseName(ravenFile string) string {
base := filepath.Base(ravenFile)
nameWithoutSuffix := strings.TrimSuffix(base, ".selections.txt")
idx := strings.Index(nameWithoutSuffix, ".Table.")
if idx > 0 {
nameWithoutSuffix = nameWithoutSuffix[:idx]
}

if !scanner.Scan() {
return nil, false, false, fmt.Errorf("empty file")
}
header := strings.Split(scanner.Text(), "\t")

idx, err := parseRavenHeader(header)
if err != nil {
return nil, false, false, err
}

selections, err := parseRavenSelections(scanner, idx)
if err != nil {
return nil, false, false, err
}

if len(selections) == 0 {
return nil, false, true, nil
}
if wavPath == "" {
return nil, false, true, nil
}

sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
if err != nil {
return nil, false, true, nil
}

dataPath := wavPath + ".data"
segments := buildRavenSegments(selections, sampleRate)

meta := AviaNZMeta{Operator: "Raven", Duration: duration}
reviewer := "None"
meta.Reviewer = &reviewer

if err := writeDotDataFileSafe(dataPath, segments, "Raven", meta); err != nil {
return nil, false, false, err
}

var calls []ClusteredCall
for _, sel := range selections {
calls = append(calls, ClusteredCall{
File: wavPath,
StartTime: sel.StartTime,
EndTime: sel.EndTime,
EbirdCode: sel.Species,
Segments: 1,
})
}

return calls, true, false, nil
}

// buildRavenSegments converts Raven selections to AviaNZ segments
func buildRavenSegments(selections []RavenSelection, sampleRate int) []AviaNZSegment {
var segments []AviaNZSegment

for _, sel := range selections {
labels := []AviaNZLabel{
{
Species: sel.Species,
Certainty: 70, // Default certainty for Raven (no confidence metric)
Filter: "Raven",
},
}

// Use frequency range from Raven, or full band if not specified
freqLow := sel.FreqLow
freqHigh := sel.FreqHigh
if freqLow == 0 && freqHigh == 0 {
freqHigh = float64(sampleRate)
}

segment := AviaNZSegment{
sel.StartTime,
sel.EndTime,
freqLow,
freqHigh,
labels,
}
segments = append(segments, segment)
}

return segments
}
}

// resolveWAVPath finds the WAV file corresponding to a Raven file
func resolveWAVPath(ravenFile string, cache *DirCache) string {
baseName := deriveWAVBaseName(ravenFile)
if cache != nil {
return cache.FindWAV(baseName)
}
return findWAVFile(filepath.Dir(ravenFile), baseName)

// Find WAV file
wavPath := resolveWAVPath(ravenFile, cache)
defer func() { _ = file.Close() }()

scanner := bufio.NewScanner(file)
}
return nameWithoutSuffix
}

// processRavenFileCached processes a single Raven selection file using a DirCache for WAV lookup
func processRavenFileCached(ravenFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
file, err := os.Open(ravenFile)
if err != nil {
return nil, false, false, fmt.Errorf("failed to open file: %w", err)
sel.Species = fields[idx.speciesIdx]
return sel, nil
}
return selections, nil
}
return idx, nil
}
// parseRavenHeader finds column indices from a tab-separated header line
func parseRavenHeader(header []string) (ravenColumnIndices, error) {
idx := ravenColumnIndices{beginTimeIdx: -1, endTimeIdx: -1, lowFreqIdx: -1, highFreqIdx: -1, speciesIdx: -1}
file deletion: calls_from_preds_test.go (----------)

[6.248737]→[6.483453:483501](∅→∅),[6.483501]→[6.471745:471745](∅→∅)

package tools

import (
"os"
"path/filepath"
"testing"

"skraak/utils"
)

func TestCallsFromPreds_EmptyFilterError(t *testing.T) {
// Create a temp CSV file
tmpDir := t.TempDir()
csvPath := filepath.Join(tmpDir, "preds.csv")
csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
t.Fatal(err)
}

// Create a dummy WAV file (minimal valid WAV)
wavPath := filepath.Join(tmpDir, "test.wav")
createMinimalWAV(t, wavPath, 44100, 10.0)

// Test with empty filter (should error)
input := CallsFromPredsInput{
CSVPath: csvPath,
Filter: "",
WriteDotData: true,
ProgressHandler: nil,
}

output, err := CallsFromPreds(input)

// Should return error
if err == nil {
t.Error("expected error for empty filter, got nil")
}
if output.Error == nil || *output.Error == "" {
t.Error("expected error message in output, got empty")
}
}

func TestCallsFromPreds_NewDataFile(t *testing.T) {
// Create a temp CSV file
tmpDir := t.TempDir()
csvPath := filepath.Join(tmpDir, "predsST_test-filter_2025-01-01.csv")
csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
t.Fatal(err)
}

// Create a dummy WAV file
wavPath := filepath.Join(tmpDir, "test.wav")
createMinimalWAV(t, wavPath, 44100, 10.0)

// Test with filter parsed from filename
input := CallsFromPredsInput{
CSVPath: csvPath,
Filter: "", // Will parse from filename
WriteDotData: true,
ProgressHandler: nil,
}

output, err := CallsFromPreds(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.DataFilesWritten != 1 {
t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
}
if output.Filter != "test-filter" {
t.Errorf("expected filter 'test-filter', got '%s'", output.Filter)
}

// Verify .data file was created
dataPath := wavPath + ".data"
if _, err := os.Stat(dataPath); os.IsNotExist(err) {
t.Error("expected .data file to be created")
}

// Verify content
df, err := utils.ParseDataFile(dataPath)
if err != nil {
t.Fatalf("failed to parse .data file: %v", err)
}
if len(df.Segments) != 1 {
t.Errorf("expected 1 segment, got %d", len(df.Segments))
}
if len(df.Segments[0].Labels) != 1 {
t.Errorf("expected 1 label, got %d", len(df.Segments[0].Labels))
}
if df.Segments[0].Labels[0].Filter != "test-filter" {
t.Errorf("expected filter 'test-filter', got '%s'", df.Segments[0].Labels[0].Filter)
}
}

func TestCallsFromPreds_ExistingDataFileSameFilter(t *testing.T) {
// Create a temp CSV file
tmpDir := t.TempDir()
csvPath := filepath.Join(tmpDir, "predsST_existing-filter_2025-01-01.csv")
csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
t.Fatal(err)
}

// Create a dummy WAV file
wavPath := filepath.Join(tmpDir, "test.wav")
createMinimalWAV(t, wavPath, 44100, 10.0)

// Create existing .data file with same filter
dataPath := wavPath + ".data"
existingData := `[
{"Operator": "Manual", "Reviewer": "David", "Duration": 10.0},
[5.0, 8.0, 0, 44100, [{"species": "morepork", "certainty": 90, "filter": "existing-filter"}]]
]`
if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
t.Fatal(err)
}

// Test with same filter (should error)
input := CallsFromPredsInput{
CSVPath: csvPath,
Filter: "", // Will parse from filename -> "existing-filter"
WriteDotData: true,
ProgressHandler: nil,
}

output, err := CallsFromPreds(input)

// Should return error
if err == nil {
t.Error("expected error for same filter, got nil")
}
if output.Error == nil {
t.Error("expected error message in output")
}

// Verify original .data file is unchanged
df, err := utils.ParseDataFile(dataPath)
if err != nil {
t.Fatalf("failed to parse .data file: %v", err)
}
if len(df.Segments) != 1 {
t.Errorf("expected original 1 segment, got %d", len(df.Segments))
}
if df.Segments[0].Labels[0].Species != "morepork" {
t.Errorf("expected original species 'morepork', got '%s'", df.Segments[0].Labels[0].Species)
}
}

func TestCallsFromPreds_ExistingDataFileDifferentFilter(t *testing.T) {
// Create a temp CSV file
tmpDir := t.TempDir()
csvPath := filepath.Join(tmpDir, "predsST_new-filter_2025-01-01.csv")
csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
t.Fatal(err)
}

// Create a dummy WAV file
wavPath := filepath.Join(tmpDir, "test.wav")
createMinimalWAV(t, wavPath, 44100, 10.0)

// Create existing .data file with different filter
dataPath := wavPath + ".data"
existingData := `[
{"Operator": "Manual", "Reviewer": "David", "Duration": 10.0},
[5.0, 8.0, 0, 44100, [{"species": "morepork", "certainty": 90, "filter": "old-filter"}]]
]`
if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
t.Fatal(err)
}

// Test with different filter (should merge)
input := CallsFromPredsInput{
CSVPath: csvPath,
Filter: "", // Will parse from filename -> "new-filter"
WriteDotData: true,
ProgressHandler: nil,
}

output, err := CallsFromPreds(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.DataFilesWritten != 1 {
t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
}

// Verify .data file has merged content
df, err := utils.ParseDataFile(dataPath)
if err != nil {
t.Fatalf("failed to parse .data file: %v", err)
}
if len(df.Segments) != 2 {
t.Errorf("expected 2 segments after merge, got %d", len(df.Segments))
}

// Check segments are sorted by start time
if df.Segments[0].StartTime > df.Segments[1].StartTime {
t.Error("expected segments to be sorted by start time")
}

// Check both filters are present
filters := make(map[string]bool)
for _, seg := range df.Segments {
for _, label := range seg.Labels {
filters[label.Filter] = true
}
}
if !filters["old-filter"] {
t.Error("expected 'old-filter' to be present")
}
if !filters["new-filter"] {
t.Error("expected 'new-filter' to be present")
}
}

func TestCallsFromPreds_ExistingDataFileParseError(t *testing.T) {
// Create a temp CSV file
tmpDir := t.TempDir()
csvPath := filepath.Join(tmpDir, "predsST_test-filter_2025-01-01.csv")
csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
t.Fatal(err)
}

// Create a dummy WAV file
wavPath := filepath.Join(tmpDir, "test.wav")
createMinimalWAV(t, wavPath, 44100, 10.0)

// Create corrupted .data file
dataPath := wavPath + ".data"
corruptedData := `this is not valid json`
if err := os.WriteFile(dataPath, []byte(corruptedData), 0644); err != nil {
t.Fatal(err)
}

// Test (should error due to parse failure)
input := CallsFromPredsInput{
CSVPath: csvPath,
Filter: "",
WriteDotData: true,
ProgressHandler: nil,
}

output, err := CallsFromPreds(input)

// Should return error
if err == nil {
t.Error("expected error for corrupted .data file, got nil")
}
if output.Error == nil {
t.Error("expected error message in output")
}

// Verify original file is unchanged
content, err := os.ReadFile(dataPath)
if err != nil {
t.Fatal(err)
}
if string(content) != corruptedData {
t.Error("expected corrupted file to remain unchanged")
}
}

func TestCallsFromPreds_ExplicitFilter(t *testing.T) {
// Create a temp CSV file with non-standard name
tmpDir := t.TempDir()
csvPath := filepath.Join(tmpDir, "predictions.csv")
csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
t.Fatal(err)
}

// Create a dummy WAV file
wavPath := filepath.Join(tmpDir, "test.wav")
createMinimalWAV(t, wavPath, 44100, 10.0)

// Test with explicit filter
input := CallsFromPredsInput{
CSVPath: csvPath,
Filter: "my-custom-filter",
WriteDotData: true,
ProgressHandler: nil,
}

output, err := CallsFromPreds(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.Filter != "my-custom-filter" {
t.Errorf("expected filter 'my-custom-filter', got '%s'", output.Filter)
}

// Verify .data file uses explicit filter
dataPath := wavPath + ".data"
df, err := utils.ParseDataFile(dataPath)
if err != nil {
t.Fatalf("failed to parse .data file: %v", err)
}
if df.Segments[0].Labels[0].Filter != "my-custom-filter" {
t.Errorf("expected filter 'my-custom-filter' in .data file, got '%s'", df.Segments[0].Labels[0].Filter)
}
}

func TestCallsFromPreds_NonParsableFilenameNoFilter(t *testing.T) {
// Create a temp CSV file with non-standard name that can't be parsed
tmpDir := t.TempDir()
csvPath := filepath.Join(tmpDir, "random_name.csv")
csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
t.Fatal(err)
}

// Create a dummy WAV file
wavPath := filepath.Join(tmpDir, "test.wav")
createMinimalWAV(t, wavPath, 44100, 10.0)

// Test with no filter and non-parsable filename (should error)
input := CallsFromPredsInput{
CSVPath: csvPath,
Filter: "",
WriteDotData: true,
ProgressHandler: nil,
}

output, err := CallsFromPreds(input)

// Should return error
if err == nil {
t.Error("expected error for unparsable filename with no filter, got nil")
}
if output.Error == nil {
t.Error("expected error message in output")
}
}

// createMinimalWAV creates a minimal valid WAV file for testing
func createMinimalWAV(t *testing.T, path string, sampleRate int, duration float64) {
t.Helper()

numSamples := int(float64(sampleRate) * duration)
dataSize := numSamples * 2 // 16-bit mono

// WAV header (44 bytes)
header := make([]byte, 44)

// RIFF header
copy(header[0:4], "RIFF")
totalSize := uint32(36 + dataSize)
header[4] = byte(totalSize)
header[5] = byte(totalSize >> 8)
header[6] = byte(totalSize >> 16)
header[7] = byte(totalSize >> 24)
copy(header[8:12], "WAVE")

// fmt chunk
copy(header[12:16], "fmt ")
chunkSize := uint32(16)
header[16] = byte(chunkSize)
header[17] = byte(chunkSize >> 8)
header[18] = byte(chunkSize >> 16)
header[19] = byte(chunkSize >> 24)
audioFormat := uint16(1) // PCM
header[20] = byte(audioFormat)
header[21] = byte(audioFormat >> 8)
numChannels := uint16(1)
header[22] = byte(numChannels)
header[23] = byte(numChannels >> 8)
header[24] = byte(sampleRate)
header[25] = byte(sampleRate >> 8)
header[26] = byte(sampleRate >> 16)
header[27] = byte(sampleRate >> 24)
byteRate := uint32(sampleRate * 2)
header[28] = byte(byteRate)
header[29] = byte(byteRate >> 8)
header[30] = byte(byteRate >> 16)
header[31] = byte(byteRate >> 24)
blockAlign := uint16(2)
header[32] = byte(blockAlign)
header[33] = byte(blockAlign >> 8)
bitsPerSample := uint16(16)
header[34] = byte(bitsPerSample)
header[35] = byte(bitsPerSample >> 8)

// data chunk
copy(header[36:40], "data")
header[40] = byte(dataSize)
header[41] = byte(dataSize >> 8)
header[42] = byte(dataSize >> 16)
header[43] = byte(dataSize >> 24)

// Create file with header and silence
file, err := os.Create(path)
if err != nil {
t.Fatal(err)
}
defer file.Close()

if _, err := file.Write(header); err != nil {
t.Fatal(err)
}

// Write silence (zeros)
silence := make([]byte, dataSize)
if _, err := file.Write(silence); err != nil {
t.Fatal(err)
}
}
file deletion: calls_from_preds.go (----------)

[6.248737]→[6.504729:504772](∅→∅),[6.504772]→[6.483503:483503](∅→∅)

package tools

import (
"encoding/csv"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"sync"
"sync/atomic"

"skraak/utils"
)

// Constants for clustering algorithm
const (
CLUSTER_GAP_MULTIPLIER = 2 // 3 Gap threshold = CLUSTER_GAP_MULTIPLIER * clip_duration. 3 for kiwi
MIN_DETECTIONS_PER_CLUSTER = 0 // 1 = filter out single detections (used for kiwi, they have long calls 30s), 0 = let single detections pass through
DEFAULT_CERTAINTY = 70 // .data certainty:70
DOT_DATA_WORKERS = 8 // Number of parallel workers for .data file writing
)

// ClusteredCall represents a clustered bird call detection
type ClusteredCall struct {
File string `json:"file"`
StartTime float64 `json:"start_time"`
EndTime float64 `json:"end_time"`
EbirdCode string `json:"ebird_code"`
Segments int `json:"segments"`
}

// CallsFromPredsInput defines the input for the calls-from-preds tool
type CallsFromPredsInput struct {
CSVPath string `json:"csv_path"`
Filter string `json:"filter"`
WriteDotData bool `json:"write_dot_data"`
GapMultiplier int `json:"gap_multiplier"`
MinDetections int `json:"min_detections"`
ProgressHandler ProgressHandler `json:"-"` // Optional progress callback (not serialized)
}

// ProgressHandler is a callback function for reporting progress during long operations
// processed: number of items processed so far
// total: total number of items to process
// message: optional status message
type ProgressHandler func(processed, total int, message string)

// CallsFromPredsOutput defines the output for the calls-from-preds tool
type CallsFromPredsOutput struct {
Calls []ClusteredCall `json:"calls"`
TotalCalls int `json:"total_calls"`
ClipDuration float64 `json:"clip_duration"`
GapThreshold float64 `json:"gap_threshold"`
SpeciesCount map[string]int `json:"species_count"`
DataFilesWritten int `json:"data_files_written"`
DataFilesSkipped int `json:"data_files_skipped"`
Filter string `json:"filter"`
Error *string `json:"error,omitempty"`
}

// AviaNZ .data file types

// predFileSpeciesKey groups detections by file and ebird code
type predFileSpeciesKey struct {
File string
EbirdCode string
}

// CallsFromPreds reads a predictions CSV and clusters detections into continuous bird calls
func CallsFromPreds(input CallsFromPredsInput) (CallsFromPredsOutput, error) {
var output CallsFromPredsOutput

// Determine filter: use provided filter, or parse from CSV filename
filter := input.Filter
if filter == "" {
filter = ParseFilterFromFilename(input.CSVPath)
}
if filter == "" {
errMsg := "Filter must be specified via --filter flag or parsable from CSV filename"
output.Error = &errMsg
return output, fmt.Errorf("%s", errMsg)
}
output.Filter = filter

_, detections, clipDuration, err := readPredCSV(input.CSVPath)
if err != nil {
errMsg := err.Error()
output.Error = &errMsg
return output, err
}
output.ClipDuration = clipDuration

gapMultiplier := CLUSTER_GAP_MULTIPLIER
if input.GapMultiplier > 0 {
gapMultiplier = input.GapMultiplier
}
minDetections := MIN_DETECTIONS_PER_CLUSTER
if input.MinDetections >= 0 {
minDetections = input.MinDetections
}
gapThreshold := float64(gapMultiplier) * clipDuration
output.GapThreshold = gapThreshold

allCalls, speciesCount := clusterDetections(detections, clipDuration, gapThreshold, minDetections)

output.Calls = allCalls
output.TotalCalls = len(allCalls)
output.SpeciesCount = speciesCount

if input.WriteDotData {
dataFilesWritten, dataFilesSkipped, err := writeDotFiles(input.CSVPath, filter, allCalls, input.ProgressHandler)
if err != nil {
errMsg := fmt.Sprintf("Error writing .data files: %v", err)
output.Error = &errMsg
return output, fmt.Errorf("%s", errMsg)
}
output.DataFilesWritten = dataFilesWritten
output.DataFilesSkipped = dataFilesSkipped
}

return output, nil
}

// readPredCSV opens and reads a predictions CSV, returning column mappings, detections, and clip duration
func readPredCSV(csvPath string) (predCSVColumns, map[predFileSpeciesKey][]float64, float64, error) {
file, err := os.Open(csvPath)
if err != nil {
return predCSVColumns{}, nil, 0, fmt.Errorf("failed to open CSV file: %w", err)
}
defer func() { _ = file.Close() }()

reader := csv.NewReader(file)
reader.ReuseRecord = true

header, err := reader.Read()
if err != nil {
return predCSVColumns{}, nil, 0, fmt.Errorf("failed to read CSV header: %w", err)
}

cols, err := findPredCSVColumns(header)
if err != nil {
return predCSVColumns{}, nil, 0, err
}

detections, clipDuration, err := readPredCSVRows(reader, cols)
if err != nil {
return predCSVColumns{}, nil, 0, err
}

return cols, detections, clipDuration, nil
}

// predCSVColumns holds the column indices for a predictions CSV
type predCSVColumns struct {
fileIdx int
startTimeIdx int
endTimeIdx int
ebirdCodes []string
ebirdIdx []int
}

// findPredCSVColumns parses the CSV header to find column indices
func findPredCSVColumns(header []string) (predCSVColumns, error) {
cols := predCSVColumns{
fileIdx: -1,
startTimeIdx: -1,
endTimeIdx: -1,
}

for i, col := range header {
switch col {
case "file":
cols.fileIdx = i
case "start_time":
cols.startTimeIdx = i
case "end_time":
cols.endTimeIdx = i
default:
if ignoredColumns[col] {
continue
}
cols.ebirdCodes = append(cols.ebirdCodes, col)
cols.ebirdIdx = append(cols.ebirdIdx, i)
}
}

if cols.fileIdx == -1 || cols.startTimeIdx == -1 || cols.endTimeIdx == -1 {
return cols, fmt.Errorf("CSV must have 'file', 'start_time', and 'end_time' columns")
}
if len(cols.ebirdCodes) == 0 {
return cols, fmt.Errorf("CSV must have at least one ebird code column")
}

// readPredCSVRows reads all CSV data rows and returns detections grouped by file+species, plus clip duration
func readPredCSVRows(reader *csv.Reader, cols predCSVColumns) (map[predFileSpeciesKey][]float64, float64, error) {
detections := make(map[predFileSpeciesKey][]float64)
clipDuration := 0.0

record, err := reader.Read()
if err == io.EOF {
return detections, 0, nil
}
if err != nil {
return nil, 0, fmt.Errorf("failed to read first CSV row: %w", err)
}

startTime, _ := strconv.ParseFloat(record[cols.startTimeIdx], 64)
endTime, _ := strconv.ParseFloat(record[cols.endTimeIdx], 64)
clipDuration = endTime - startTime

addDetectionsFromRow(record, cols, startTime, detections)

for {
record, err := reader.Read()
if err == io.EOF {
break
}
if err != nil {
return nil, 0, fmt.Errorf("failed to read CSV row: %w", err)
}

startTime, _ = strconv.ParseFloat(record[cols.startTimeIdx], 64)
addDetectionsFromRow(record, cols, startTime, detections)
}

return detections, clipDuration, nil
}

// addDetectionsFromRow adds positive detections from a single CSV row
func addDetectionsFromRow(record []string, cols predCSVColumns, startTime float64, detections map[predFileSpeciesKey][]float64) {
fileName := record[cols.fileIdx]
for i, idx := range cols.ebirdIdx {
if record[idx] == "1" {
key := predFileSpeciesKey{File: fileName, EbirdCode: cols.ebirdCodes[i]}
detections[key] = append(detections[key], startTime)
}
}
}

// clusterDetections groups detections into clusters and produces sorted ClusteredCalls
func clusterDetections(detections map[predFileSpeciesKey][]float64, clipDuration, gapThreshold float64, minDetections int) ([]ClusteredCall, map[string]int) {
var allCalls []ClusteredCall
speciesCount := make(map[string]int)

for key, startTimes := range detections {
sort.Float64s(startTimes)

clusters := clusterStartTimes(startTimes, gapThreshold)

for _, cluster := range clusters {
if len(cluster) <= minDetections {
continue
}

call := ClusteredCall{
File: key.File,
StartTime: cluster[0],
EndTime: cluster[len(cluster)-1] + clipDuration,
EbirdCode: key.EbirdCode,
Segments: len(cluster),
}
allCalls = append(allCalls, call)
speciesCount[key.EbirdCode]++
}
}

sort.Slice(allCalls, func(i, j int) bool {
if allCalls[i].File != allCalls[j].File {
return allCalls[i].File < allCalls[j].File
}
return allCalls[i].StartTime < allCalls[j].StartTime
})

return allCalls, speciesCount
}

// DirCache caches directory entries for fast WAV file lookup.
// Scans the directory once and builds a map from lowercased basename to full filename.
// Safe for concurrent read-only use after construction.
type DirCache struct {
dir string
wavMap map[string]string // lowercase basename -> filename with original case (e.g. "20230610_150000" -> "20230610_150000.WAV")
dirMap map[string]string // lowercase basename -> filename for any file (used by from-raven for .selections.txt etc.)
}

// NewDirCache creates a DirCache by scanning the directory once.
func NewDirCache(dir string) *DirCache {
entries, err := os.ReadDir(dir)
if err != nil {
return &DirCache{dir: dir, wavMap: make(map[string]string), dirMap: make(map[string]string)}
}
wavMap := make(map[string]string, len(entries))
dirMap := make(map[string]string, len(entries))
for _, entry := range entries {
if entry.IsDir() {
continue
}
name := entry.Name()
ext := filepath.Ext(name)
base := strings.TrimSuffix(name, ext)
dirMap[strings.ToLower(base)] = name
if strings.EqualFold(ext, ".wav") {
wavMap[strings.ToLower(base)] = name
}
}
return &DirCache{dir: dir, wavMap: wavMap, dirMap: dirMap}
}

// FindWAV looks up a WAV file by basename (case-insensitive).
// Returns the full path with correct case, or empty string if not found.
func (dc *DirCache) FindWAV(baseName string) string {
if name, ok := dc.wavMap[strings.ToLower(baseName)]; ok {
return filepath.Join(dc.dir, name)
}
return ""
}

// FindFile looks up any file by basename (case-insensitive).
// Returns the full path with correct case, or empty string if not found.
func (dc *DirCache) FindFile(baseName string) string {
if name, ok := dc.dirMap[strings.ToLower(baseName)]; ok {
return filepath.Join(dc.dir, name)
}
return ""
}

// findWAVFile finds a WAV file in the directory with case-insensitive matching.
// baseName is the filename without extension (e.g., "20230610_150000").
// Returns the full path with correct case, or empty string if not found.
// Deprecated: Use DirCache.FindWAV for batch operations to avoid repeated directory scans.
func findWAVFile(dir, baseName string) string {
entries, err := os.ReadDir(dir)
if err != nil {
return ""
}
for _, entry := range entries {
if entry.IsDir() {
continue
}
name := entry.Name()
ext := filepath.Ext(name)
nameNoExt := strings.TrimSuffix(name, ext)
if nameNoExt == baseName && strings.EqualFold(ext, ".wav") {
return filepath.Join(dir, name)
}
}
return ""
}

// writeDotFiles writes AviaNZ .data files for each audio file with calls
// Uses parallel workers for improved performance on large batches
func writeDotFiles(csvPath, filter string, calls []ClusteredCall, progress ProgressHandler) (int, int, error) {
// Base directory is the directory containing the CSV file
csvDir := filepath.Dir(csvPath)

// Group calls by file (using extracted filename)
callsByFile := make(map[string][]ClusteredCall)
for _, call := range calls {
filename := filepath.Base(call.File)
callsByFile[filename] = append(callsByFile[filename], call)
}

// Report initial progress
if progress != nil {
progress(0, len(callsByFile), "Processing WAV files")
}

// If small batch, process sequentially (avoid goroutine overhead)
if len(callsByFile) < 10 {
return writeDotFilesSequential(csvDir, filter, callsByFile, progress)
}

// Parallel processing for larger batches
return writeDotFilesParallel(csvDir, filter, callsByFile, progress)
}

// dotDataJob represents a single file to process
type dotDataJob struct {
filename string
fileCalls []ClusteredCall
}

// dotDataResult represents the result of processing a single file
type dotDataResult struct {
filename string
written bool
err error
}

// writeDotFilesSequential processes files one at a time (for small batches)
func writeDotFilesSequential(csvDir, filter string, callsByFile map[string][]ClusteredCall, progress ProgressHandler) (int, int, error) {
dataFilesWritten := 0
dataFilesSkipped := 0
total := len(callsByFile)
processed := 0

for filename, fileCalls := range callsByFile {
// Find WAV file with correct case
baseName := strings.TrimSuffix(filename, filepath.Ext(filename))
wavPath := findWAVFile(csvDir, baseName)
if wavPath == "" {
dataFilesSkipped++
processed++
if progress != nil {
progress(processed, total, "")
}
continue
}

dataPath := wavPath + ".data"

sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
if err != nil {
dataFilesSkipped++
processed++
if progress != nil {
progress(processed, total, "")
}
continue
}

// Build segments and metadata
meta, segments := buildAviaNZMetaAndSegments(fileCalls, filter, duration, sampleRate)

if err := writeDotDataFileSafe(dataPath, segments, filter, meta); err != nil {
return dataFilesWritten, dataFilesSkipped, fmt.Errorf("failed to write %s: %w", dataPath, err)
}

dataFilesWritten++
processed++
if progress != nil {
progress(processed, total, "")
}
}

return dataFilesWritten, dataFilesSkipped, nil
}

// writeDotFilesParallel processes files concurrently using a worker pool
func writeDotFilesParallel(csvDir, filter string, callsByFile map[string][]ClusteredCall, progress ProgressHandler) (int, int, error) {
total := len(callsByFile)
var processed atomic.Int32

// Create job channel
jobs := make(chan dotDataJob, len(callsByFile))
results := make(chan dotDataResult, len(callsByFile))

// Start workers
var wg sync.WaitGroup
for range DOT_DATA_WORKERS {
wg.Add(1)
go dotDataWorker(csvDir, filter, jobs, results, &wg)
}

// Send jobs
for filename, fileCalls := range callsByFile {
jobs <- dotDataJob{filename: filename, fileCalls: fileCalls}
}
close(jobs)

// Wait for workers to finish
go func() {
wg.Wait()
close(results)
}()

// Collect results with progress reporting
dataFilesWritten := 0
dataFilesSkipped := 0
var firstErr error

for result := range results {
if result.err != nil && firstErr == nil {
firstErr = result.err
}
if result.written {
dataFilesWritten++
} else {
dataFilesSkipped++
}

// Report progress
if progress != nil {
current := int(processed.Add(1))
progress(current, total, "")
}
}

return dataFilesWritten, dataFilesSkipped, firstErr
}

// dotDataWorker processes files from the jobs channel
func dotDataWorker(csvDir, filter string, jobs <-chan dotDataJob, results chan<- dotDataResult, wg *sync.WaitGroup) {
defer wg.Done()

for job := range jobs {
// Find WAV file with correct case
baseName := strings.TrimSuffix(job.filename, filepath.Ext(job.filename))
wavPath := findWAVFile(csvDir, baseName)
if wavPath == "" {
results <- dotDataResult{filename: job.filename, written: false, err: nil}
continue
}

dataPath := wavPath + ".data"

sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
if err != nil {
results <- dotDataResult{filename: job.filename, written: false, err: nil}
continue
}

// Build segments and metadata
meta, segments := buildAviaNZMetaAndSegments(job.fileCalls, filter, duration, sampleRate)

if err := writeDotDataFileSafe(dataPath, segments, filter, meta); err != nil {
results <- dotDataResult{filename: job.filename, written: false, err: fmt.Errorf("failed to write %s: %w", dataPath, err)}
continue
}

results <- dotDataResult{filename: job.filename, written: true, err: nil}
}
}

// buildAviaNZMetaAndSegments creates metadata and segments for a .data file
func buildAviaNZMetaAndSegments(calls []ClusteredCall, filter string, duration float64, sampleRate int) (AviaNZMeta, []AviaNZSegment) {
// Create metadata
reviewer := "None"
meta := AviaNZMeta{
Operator: "Auto",
Reviewer: &reviewer,
Duration: duration,
}

// Build segments array
var segments []AviaNZSegment
for _, call := range calls {
// Create labels for this segment
labels := []AviaNZLabel{
{
Species: call.EbirdCode,
Certainty: DEFAULT_CERTAINTY,
Filter: filter,
},
}

// Create segment: [start, end, freq_low, freq_high, labels]
// freq_low=0, freq_high=sampleRate for full-band segments
segment := AviaNZSegment{
call.StartTime,
call.EndTime,
0, // freq_low
sampleRate, // freq_high (full band)
labels,
}
segments = append(segments, segment)
}

return meta, segments
}

// writeAviaNZDataFile writes a new .data file to disk (does not check for existing files)
func writeAviaNZDataFile(path string, data []any) error {
file, err := os.Create(path)
if err != nil {
return fmt.Errorf("failed to create file: %w", err)
}
defer func() { _ = file.Close() }()

encoder := json.NewEncoder(file)
encoder.SetIndent("", "") // No indentation for compact output

if err := encoder.Encode(data); err != nil {
return fmt.Errorf("failed to encode JSON: %w", err)
}

return nil
}

// writeDotDataFileSafe safely writes or merges .data files
// - If file doesn't exist: write new file
// - If file exists with same filter: return error (refuse to clobber)
// - If file exists with different filter: merge segments and write
// - If file exists but can't be parsed: return error (refuse to clobber)
func writeDotDataFileSafe(path string, newSegments []AviaNZSegment, filter string, meta AviaNZMeta) error {
// Check if file exists
if _, err := os.Stat(path); err == nil {
// File exists - parse and check
existing, err := utils.ParseDataFile(path)
if err != nil {
return fmt.Errorf("cannot parse existing %s: %w (refusing to clobber)", path, err)
}

// Check for duplicate filter
for _, seg := range existing.Segments {
if seg.HasFilterLabel(filter) {
return fmt.Errorf("%s already contains filter '%s' (refusing to clobber)", path, filter)
}
}

// Append new segments (different filter - safe to merge)
for _, newSeg := range newSegments {
seg := convertAviaNZSegment(newSeg, filter)
existing.Segments = append(existing.Segments, seg)
}

// Sort by start time
sort.Slice(existing.Segments, func(i, j int) bool {
return existing.Segments[i].StartTime < existing.Segments[j].StartTime
})

return existing.Write(path)
}

// File doesn't exist - write new
data := buildDataFileFromSegments(meta, newSegments)
return writeAviaNZDataFile(path, data)
}

// convertAviaNZSegment converts an AviaNZSegment to utils.Segment
func convertAviaNZSegment(seg AviaNZSegment, filter string) *utils.Segment {
labels := seg[4].([]AviaNZLabel)
utilsLabels := make([]*utils.Label, len(labels))
for i, l := range labels {
utilsLabels[i] = &utils.Label{
Species: l.Species,
Certainty: l.Certainty,
Filter: filter,
}
}

// Handle freq values (could be int or float64 depending on how they were created)
var freqLow, freqHigh float64
switch v := seg[2].(type) {
case int:
freqLow = float64(v)
case float64:
freqLow = v
}
switch v := seg[3].(type) {
case int:
freqHigh = float64(v)
case float64:
freqHigh = v
}

return &utils.Segment{
StartTime: seg[0].(float64),
EndTime: seg[1].(float64),
FreqLow: freqLow,
FreqHigh: freqHigh,
Labels: utilsLabels,
}
}

// buildDataFileFromSegments builds the data file structure from meta and segments
func buildDataFileFromSegments(meta AviaNZMeta, segments []AviaNZSegment) []any {
result := make([]any, 0, 1+len(segments))
result = append(result, meta)
for _, seg := range segments {
result = append(result, seg)
}
return result
}

// ParseFilterFromFilename extracts filter name from preds CSV filename
// "predsST_opensoundscape-kiwi-1.2_2025-11-12.csv" -> "opensoundscape-kiwi-1.2"
// Returns empty string if parsing fails
func ParseFilterFromFilename(csvPath string) string {
filename := filepath.Base(csvPath)
// Remove .csv extension
name := strings.TrimSuffix(filename, ".csv")

// Split on underscore
parts := strings.Split(name, "_")
if len(parts) == 3 {
return parts[1]
}

return ""
}

// clusterStartTimes groups consecutive start times into clusters
// where the gap between consecutive times is <= gapThreshold
func clusterStartTimes(startTimes []float64, gapThreshold float64) [][]float64 {
if len(startTimes) == 0 {
return nil
}

var clusters [][]float64
currentCluster := []float64{startTimes[0]}

for i := 1; i < len(startTimes); i++ {
gap := startTimes[i] - startTimes[i-1]
if gap <= gapThreshold {
// Same cluster
currentCluster = append(currentCluster, startTimes[i])
} else {
// New cluster
clusters = append(clusters, currentCluster)
currentCluster = []float64{startTimes[i]}
}
}
// Don't forget the last cluster
clusters = append(clusters, currentCluster)

return clusters
}
return cols, nil
}
ignoredColumns := map[string]bool{"NotKiwi": true, "0.0": true}
file deletion: calls_from_birda_raven_test.go (----------)

[6.248737]→[6.515903:515957](∅→∅),[6.515957]→[6.504774:504774](∅→∅)

package tools

import (
"os"
"path/filepath"
"testing"

"skraak/utils"
)

// ============================================
// BirdNET Tests
// ============================================

func TestCallsFromBirda_NewDataFile(t *testing.T) {
tmpDir := t.TempDir()

// Create a minimal WAV file
wavPath := filepath.Join(tmpDir, "test.WAV")
createMinimalWAV(t, wavPath, 16000, 60.0)

// Create BirdNET results file
birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Turdus migratorius,American Robin,0.85,/some/path/test.WAV\n"
if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
t.Fatal(err)
}

input := CallsFromBirdaInput{
File: birdaPath,
}

output, err := CallsFromBirda(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.DataFilesWritten != 1 {
t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
}
if output.Filter != "BirdNET" {
t.Errorf("expected filter 'BirdNET', got '%s'", output.Filter)
}
if output.TotalCalls != 1 {
t.Errorf("expected 1 call, got %d", output.TotalCalls)
}

// Verify .data file was created
dataPath := wavPath + ".data"
df, err := utils.ParseDataFile(dataPath)
if err != nil {
t.Fatalf("failed to parse .data file: %v", err)
}
if len(df.Segments) != 1 {
t.Errorf("expected 1 segment, got %d", len(df.Segments))
}
if df.Segments[0].Labels[0].Filter != "BirdNET" {
t.Errorf("expected filter 'BirdNET', got '%s'", df.Segments[0].Labels[0].Filter)
}
if df.Segments[0].Labels[0].Certainty != 85 {
t.Errorf("expected certainty 85, got %d", df.Segments[0].Labels[0].Certainty)
}
}

func TestCallsFromBirda_ExistingSameFilter(t *testing.T) {
tmpDir := t.TempDir()

wavPath := filepath.Join(tmpDir, "test.WAV")
createMinimalWAV(t, wavPath, 16000, 60.0)

dataPath := wavPath + ".data"
existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Existing Bird", "certainty": 90, "filter": "BirdNET"}]]]`
if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
t.Fatal(err)
}

birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,New Bird,New Bird,0.85,test.WAV\n"
if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
t.Fatal(err)
}

input := CallsFromBirdaInput{File: birdaPath}
output, err := CallsFromBirda(input)

if err == nil {
t.Error("expected error for same filter, got nil")
}
if output.Error == nil {
t.Error("expected error message in output")
}
}

func TestCallsFromBirda_ExistingDifferentFilter(t *testing.T) {
tmpDir := t.TempDir()

wavPath := filepath.Join(tmpDir, "test.WAV")
createMinimalWAV(t, wavPath, 16000, 60.0)

dataPath := wavPath + ".data"
existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Kiwi", "certainty": 90, "filter": "Manual"}]]]`
if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
t.Fatal(err)
}

birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Robin,Robin,0.85,test.WAV\n"
if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
t.Fatal(err)
}

input := CallsFromBirdaInput{File: birdaPath}
output, err := CallsFromBirda(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.DataFilesWritten != 1 {
t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
}

df, err := utils.ParseDataFile(dataPath)
if err != nil {
t.Fatalf("failed to parse .data file: %v", err)
}
if len(df.Segments) != 2 {
t.Errorf("expected 2 segments after merge, got %d", len(df.Segments))
}
}

func TestCallsFromBirda_DeleteOption(t *testing.T) {
tmpDir := t.TempDir()

wavPath := filepath.Join(tmpDir, "test.WAV")
createMinimalWAV(t, wavPath, 16000, 60.0)

birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Robin,Robin,0.85,test.WAV\n"
if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
t.Fatal(err)
}

input := CallsFromBirdaInput{File: birdaPath, Delete: true}
output, err := CallsFromBirda(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.FilesDeleted != 1 {
t.Errorf("expected 1 file deleted, got %d", output.FilesDeleted)
}
if _, err := os.Stat(birdaPath); !os.IsNotExist(err) {
t.Error("expected BirdNET file to be deleted")
}
}

func TestCallsFromBirda_FolderMode(t *testing.T) {
tmpDir := t.TempDir()

for i := range 2 {
wavPath := filepath.Join(tmpDir, "test"+string(rune('0'+i))+".WAV")
createMinimalWAV(t, wavPath, 16000, 60.0)

birdaPath := filepath.Join(tmpDir, "test"+string(rune('0'+i))+".BirdNET.results.csv")
birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Bird,Bird,0.85,test.WAV\n"
if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
t.Fatal(err)
}
}

input := CallsFromBirdaInput{Folder: tmpDir}
output, err := CallsFromBirda(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.FilesProcessed != 2 {
t.Errorf("expected 2 files processed, got %d", output.FilesProcessed)
}
if output.DataFilesWritten != 2 {
t.Errorf("expected 2 data files written, got %d", output.DataFilesWritten)
}
}

// ============================================
// Raven Tests
// ============================================

func TestCallsFromRaven_NewDataFile(t *testing.T) {
tmpDir := t.TempDir()

wavPath := filepath.Join(tmpDir, "test.WAV")
createMinimalWAV(t, wavPath, 16000, 60.0)

ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tKiwi\n"
if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
t.Fatal(err)
}

input := CallsFromRavenInput{File: ravenPath}
output, err := CallsFromRaven(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.DataFilesWritten != 1 {
t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
}
if output.Filter != "Raven" {
t.Errorf("expected filter 'Raven', got '%s'", output.Filter)
}

dataPath := wavPath + ".data"
df, err := utils.ParseDataFile(dataPath)
if err != nil {
t.Fatalf("failed to parse .data file: %v", err)
}
if df.Segments[0].FreqLow != 1000 {
t.Errorf("expected freq_low 1000, got %f", df.Segments[0].FreqLow)
}
if df.Segments[0].FreqHigh != 5000 {
t.Errorf("expected freq_high 5000, got %f", df.Segments[0].FreqHigh)
}
}

func TestCallsFromRaven_ExistingSameFilter(t *testing.T) {
tmpDir := t.TempDir()

wavPath := filepath.Join(tmpDir, "test.WAV")
createMinimalWAV(t, wavPath, 16000, 60.0)

dataPath := wavPath + ".data"
existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Existing", "certainty": 90, "filter": "Raven"}]]]`
if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
t.Fatal(err)
}

ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tNew\n"
if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
t.Fatal(err)
}

input := CallsFromRavenInput{File: ravenPath}
output, err := CallsFromRaven(input)

if err == nil {
t.Error("expected error for same filter, got nil")
}
if output.Error == nil {
t.Error("expected error message in output")
}
}

func TestCallsFromRaven_ExistingDifferentFilter(t *testing.T) {
tmpDir := t.TempDir()

wavPath := filepath.Join(tmpDir, "test.WAV")
createMinimalWAV(t, wavPath, 16000, 60.0)

dataPath := wavPath + ".data"
existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Kiwi", "certainty": 90, "filter": "BirdNET"}]]]`
if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
t.Fatal(err)
}

ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tMorepork\n"
if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
t.Fatal(err)
}

input := CallsFromRavenInput{File: ravenPath}
output, err := CallsFromRaven(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.DataFilesWritten != 1 {
t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
}

df, err := utils.ParseDataFile(dataPath)
if err != nil {
t.Fatalf("failed to parse .data file: %v", err)
}
if len(df.Segments) != 2 {
t.Errorf("expected 2 segments after merge, got %d", len(df.Segments))
}
}

func TestCallsFromRaven_DeleteOption(t *testing.T) {
tmpDir := t.TempDir()

wavPath := filepath.Join(tmpDir, "test.WAV")
createMinimalWAV(t, wavPath, 16000, 60.0)

ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tKiwi\n"
if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
t.Fatal(err)
}

input := CallsFromRavenInput{File: ravenPath, Delete: true}
output, err := CallsFromRaven(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.FilesDeleted != 1 {
t.Errorf("expected 1 file deleted, got %d", output.FilesDeleted)
}
if _, err := os.Stat(ravenPath); !os.IsNotExist(err) {
t.Error("expected Raven file to be deleted")
}
}

func TestCallsFromRaven_MultipleSelections(t *testing.T) {
tmpDir := t.TempDir()

wavPath := filepath.Join(tmpDir, "test.WAV")
createMinimalWAV(t, wavPath, 16000, 60.0)

ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tKiwi\n2\tSpectrogram 1\t1\t10.0\t15.0\t2000\t6000\tMorepork\n3\tSpectrogram 1\t1\t20.0\t25.0\t1500\t4500\tTui\n"
if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
t.Fatal(err)
}

input := CallsFromRavenInput{File: ravenPath}
output, err := CallsFromRaven(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.TotalCalls != 3 {
t.Errorf("expected 3 calls, got %d", output.TotalCalls)
}
if output.SpeciesCount["Kiwi"] != 1 || output.SpeciesCount["Morepork"] != 1 || output.SpeciesCount["Tui"] != 1 {
t.Errorf("unexpected species count: %v", output.SpeciesCount)
}
}
file deletion: calls_from_birda.go (----------)

[6.248737]→[6.529042:529085](∅→∅),[6.529085]→[6.515959:515959](∅→∅)

package tools

import (
"encoding/csv"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"strings"

"skraak/utils"
)

// CallsFromBirdaInput defines the input for the calls-from-birda tool
type CallsFromBirdaInput struct {
Folder string `json:"folder"`
File string `json:"file"`
Delete bool `json:"delete"`
ProgressHandler ProgressHandler `json:"-"` // Optional progress callback
}

// CallsFromBirdaOutput defines the output for the calls-from-birda tool
type CallsFromBirdaOutput struct {
Calls []ClusteredCall `json:"calls"`
TotalCalls int `json:"total_calls"`
SpeciesCount map[string]int `json:"species_count"`
DataFilesWritten int `json:"data_files_written"`
DataFilesSkipped int `json:"data_files_skipped"`
FilesProcessed int `json:"files_processed"`
FilesDeleted int `json:"files_deleted"`
Filter string `json:"filter"`
Error *string `json:"error,omitempty"`
}

// birdaSource implements CallSource for BirdNET results files
type birdaSource struct{}

func (birdaSource) Name() string { return "BirdNET" }

func (birdaSource) FindFiles(folder string) ([]string, error) {
var files []string

entries, err := os.ReadDir(folder)
if err != nil {
return nil, err
}

for _, entry := range entries {
name := entry.Name()
if strings.HasSuffix(name, ".BirdNET.results.csv") {
files = append(files, filepath.Join(folder, name))
}
}

return files, nil
}

commonOutput, err := callsFromSource(src, commonInput)

// Convert to Birda-specific output type
var output CallsFromBirdaOutput
output.Calls = commonOutput.Calls
output.TotalCalls = commonOutput.TotalCalls
output.SpeciesCount = commonOutput.SpeciesCount
output.DataFilesWritten = commonOutput.DataFilesWritten
output.DataFilesSkipped = commonOutput.DataFilesSkipped
output.FilesProcessed = commonOutput.FilesProcessed
output.FilesDeleted = commonOutput.FilesDeleted
output.Filter = commonOutput.Filter
output.Error = commonOutput.Error
return output, err
}

// BirdNETDetection represents a single BirdNET detection
type BirdNETDetection struct {
StartTime float64
EndTime float64
ScientificName string
CommonName string
Confidence float64
WAVPath string
}

// birdaColumnIndices holds the parsed column positions from a BirdNET CSV header.
type birdaColumnIndices struct {
startIdx int
endIdx int
commonNameIdx int
confidenceIdx int
fileIdx int
}

// parseBirdaCSVHeader reads the CSV header row and returns column indices.
func parseBirdaCSVHeader(reader *csv.Reader) (birdaColumnIndices, error) {
header, err := reader.Read()
if err != nil {
return birdaColumnIndices{}, fmt.Errorf("failed to read header: %w", err)
}

idx := birdaColumnIndices{startIdx: -1, endIdx: -1, commonNameIdx: -1, confidenceIdx: -1, fileIdx: -1}
for i, col := range header {
col = strings.TrimPrefix(col, "\ufeff")
switch col {
case "Start (s)":
idx.startIdx = i
case "End (s)":
idx.endIdx = i
case "Common name":
idx.commonNameIdx = i
case "Confidence":
idx.confidenceIdx = i
case "File":
idx.fileIdx = i
}
}

if idx.startIdx == -1 || idx.endIdx == -1 || idx.commonNameIdx == -1 || idx.confidenceIdx == -1 {
return birdaColumnIndices{}, fmt.Errorf("missing required columns in BirdNET file")
}

// readBirdaDetections reads all detection records from a BirdNET CSV.
func readBirdaDetections(reader *csv.Reader, idx birdaColumnIndices) ([]BirdNETDetection, error) {
var detections []BirdNETDetection
for {
record, err := reader.Read()
if err == io.EOF {
break
}
if err != nil {
return nil, fmt.Errorf("failed to read record: %w", err)
}

var det BirdNETDetection
startTime, perr := strconv.ParseFloat(record[idx.startIdx], 64)
if perr != nil {
return nil, fmt.Errorf("failed to parse start time %q: %w", record[idx.startIdx], perr)
}
det.StartTime = startTime

endTime, perr := strconv.ParseFloat(record[idx.endIdx], 64)
if perr != nil {
return nil, fmt.Errorf("failed to parse end time %q: %w", record[idx.endIdx], perr)
}
det.EndTime = endTime

det.CommonName = record[idx.commonNameIdx]

confidence, perr := strconv.ParseFloat(record[idx.confidenceIdx], 64)
if perr != nil {
return nil, fmt.Errorf("failed to parse confidence %q: %w", record[idx.confidenceIdx], perr)
}
det.Confidence = confidence

if idx.fileIdx >= 0 && idx.fileIdx < len(record) {
det.WAVPath = record[idx.fileIdx]
}

detections = append(detections, det)
}

// resolveBirdaWAVPath finds the WAV file associated with a BirdNET results file.
func resolveBirdaWAVPath(birdaFile string, firstWAVPath string, cache *DirCache) string {
if firstWAVPath != "" {
if _, err := os.Stat(firstWAVPath); err == nil {
return firstWAVPath
}
}

dir := filepath.Dir(birdaFile)
base := filepath.Base(birdaFile)
baseName := strings.TrimSuffix(base, ".BirdNET.results.csv")

if cache != nil {
return cache.FindWAV(baseName)
}

// processBirdaFileCached processes a single BirdNET results file using a DirCache for WAV lookup
func processBirdaFileCached(birdaFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
file, err := os.Open(birdaFile)
if err != nil {
return nil, false, false, fmt.Errorf("failed to open file: %w", err)
}
defer func() { _ = file.Close() }()

reader := csv.NewReader(file)

idx, err := parseBirdaCSVHeader(reader)
if err != nil {
return nil, false, false, err
}

detections, err := readBirdaDetections(reader, idx)
if err != nil {
return nil, false, false, err
}
if len(detections) == 0 {
return nil, false, true, nil
}

if wavPath == "" {
return nil, false, true, nil
}

sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
if err != nil {
return nil, false, true, nil
}

dataPath := wavPath + ".data"
segments := buildBirdNETSegments(detections, sampleRate)

meta := AviaNZMeta{Operator: "BirdNET", Duration: duration}
reviewer := "None"
meta.Reviewer = &reviewer

if err := writeDotDataFileSafe(dataPath, segments, "BirdNET", meta); err != nil {
return nil, false, false, err
}

var calls []ClusteredCall
for _, det := range detections {
calls = append(calls, ClusteredCall{
File: wavPath,
StartTime: det.StartTime,
EndTime: det.EndTime,
EbirdCode: det.CommonName,
Segments: 1,
})
}

return calls, true, false, nil
}

// buildBirdNETSegments converts BirdNET detections to AviaNZ segments
func buildBirdNETSegments(detections []BirdNETDetection, sampleRate int) []AviaNZSegment {
var segments []AviaNZSegment

for _, det := range detections {
// Convert confidence (0.0-1.0) to certainty (0-100)
certainty := min(max(int(det.Confidence*100), 0), 100)

labels := []AviaNZLabel{
{
Species: det.CommonName,
Certainty: certainty,
Filter: "BirdNET",
},
}

segment := AviaNZSegment{
det.StartTime,
det.EndTime,
0, // freq_low
sampleRate, // freq_high (full band)
labels,
}
segments = append(segments, segment)
}

return segments
}
wavPath := resolveBirdaWAVPath(birdaFile, detections[0].WAVPath, cache)
return findWAVFile(dir, baseName)
}
return detections, nil
}
return idx, nil
}

// CallsFromBirda processes BirdNET results files and writes .data files
func CallsFromBirda(input CallsFromBirdaInput) (CallsFromBirdaOutput, error) {
src := birdaSource{}
commonInput := CallsFromSourceInput(input)
}

func (birdaSource) ProcessFile(birdaFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
return processBirdaFileCached(birdaFile, cache)
file deletion: calls_detect_anomalies_test.go (----------)

[6.248737]→[6.532511:532565](∅→∅),[6.532565]→[6.529087:529087](∅→∅)

package tools

import (
"os"
"path/filepath"
"testing"
)

func TestDetectAnomalies_LabelMismatch(t *testing.T) {
dir := t.TempDir()

// Same time range, different calltypes across two models
data := `[{"Operator":"test"},` +
`[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-a"},` +
`{"species":"Kiwi","calltype":"Male","certainty":100,"filter":"model-b"}]]]`
if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
t.Fatal(err)
}

out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
if err != nil {
t.Fatal(err)
}
if out.LabelMismatches != 1 {
t.Errorf("expected 1 label mismatch, got %d", out.LabelMismatches)
}
if out.CertaintyMismatches != 0 {
t.Errorf("expected 0 certainty mismatches, got %d", out.CertaintyMismatches)
}
if out.Anomalies[0].Type != "label_mismatch" {
t.Errorf("expected label_mismatch, got %s", out.Anomalies[0].Type)
}
}

func TestDetectAnomalies_CertaintyMismatch(t *testing.T) {
dir := t.TempDir()

// Same time range, same labels, different certainty
data := `[{"Operator":"test"},` +
`[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":90,"filter":"model-a"},` +
`{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-b"}]]]`
if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
t.Fatal(err)
}

out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
if err != nil {
t.Fatal(err)
}
if out.CertaintyMismatches != 1 {
t.Errorf("expected 1 certainty mismatch, got %d", out.CertaintyMismatches)
}
if out.LabelMismatches != 0 {
t.Errorf("expected 0 label mismatches, got %d", out.LabelMismatches)
}
}

func TestDetectAnomalies_NoAnomalyWhenAgreement(t *testing.T) {
dir := t.TempDir()

data := `[{"Operator":"test"},` +
`[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-a"},` +
`{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-b"}]]]`
if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
t.Fatal(err)
}

out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
if err != nil {
t.Fatal(err)
}
if out.AnomaliesTotal != 0 {
t.Errorf("expected 0 anomalies, got %d", out.AnomaliesTotal)
}
}

func TestDetectAnomalies_LonelySegmentSkipped(t *testing.T) {
dir := t.TempDir()

// model-a has a segment, model-b has no segment in this file
data := `[{"Operator":"test"},` +
`[0,10,100,1000,[{"species":"Kiwi","certainty":100,"filter":"model-a"}]]]`
if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
t.Fatal(err)
}

out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
if err != nil {
t.Fatal(err)
}
if out.AnomaliesTotal != 0 {
t.Errorf("lonely segment should be skipped, got %d anomalies", out.AnomaliesTotal)
}
if out.FilesWithAllModels != 0 {
t.Errorf("file missing a model should not count as FilesWithAllModels")
}
}

func TestDetectAnomalies_FailsWithOneModel(t *testing.T) {
dir := t.TempDir()
_, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a"}})
if err == nil {
t.Error("expected error with only 1 model")
}
}
file deletion: calls_detect_anomalies.go (----------)

[6.248737]→[6.539374:539423](∅→∅),[6.539423]→[6.532567:532567](∅→∅)

package tools

import (
"fmt"
"os"
"path/filepath"

"skraak/utils"
)

type DetectAnomaliesInput struct {
Folder string
Models []string // at least 2 filter names
Species []string // optional scope; empty = all species
}

type DetectAnomaliesOutput struct {
Folder string `json:"folder"`
Models []string `json:"models"`
FilesExamined int `json:"files_examined"`
FilesWithAllModels int `json:"files_with_all_models"`
AnomaliesTotal int `json:"anomalies_total"`
LabelMismatches int `json:"label_mismatches"`
CertaintyMismatches int `json:"certainty_mismatches"`
Anomalies []Anomaly `json:"anomalies,omitempty"`
Error string `json:"error,omitempty"`
}

type Anomaly struct {
File string `json:"file"`
Type string `json:"type"` // "label_mismatch" | "certainty_mismatch"
Segments []AnomalySegment `json:"segments"`
}

type AnomalySegment struct {
Model string `json:"model"`
Start float64 `json:"start"`
End float64 `json:"end"`
Species string `json:"species"`
CallType string `json:"calltype,omitempty"`
Certainty int `json:"certainty"`
}

// DetectAnomalies compares corresponding segments across multiple ML model filters
// within each .data file. Segments are matched by time overlap (same logic as propagate).
// Lonely segments (no overlap in one or more models) are silently skipped.
// Anomalies are flagged when overlapping segments disagree on species+calltype,
// or when labels match but certainty values differ.
// validateAnomalyInput validates the input parameters for DetectAnomalies.
func validateAnomalyInput(input DetectAnomaliesInput) error {
if len(input.Models) < 2 {
return fmt.Errorf("at least 2 --model values required")
}
for i, a := range input.Models {
for j, b := range input.Models {
if i != j && a == b {
return fmt.Errorf("duplicate --model values are not allowed")
}
}
}

info, err := os.Stat(input.Folder)
if err != nil {
return fmt.Errorf("folder not found: %s", input.Folder)
}
if !info.IsDir() {
return fmt.Errorf("not a directory: %s", input.Folder)
}

return nil
}

func DetectAnomalies(input DetectAnomaliesInput) (DetectAnomaliesOutput, error) {
folder := filepath.Clean(input.Folder)
output := DetectAnomaliesOutput{
Folder: folder,
Models: input.Models,
}

files, err := utils.FindDataFiles(folder)
if err != nil {
output.Error = fmt.Sprintf("list .data files: %v", err)
return output, fmt.Errorf("%s", output.Error)
}

scopeSet := make(map[string]bool, len(input.Species))
for _, s := range input.Species {
scopeSet[s] = true
}

for _, path := range files {
df, err := utils.ParseDataFile(path)
if err != nil {
continue
}
output.FilesExamined++

anomalies := detectAnomaliesInFile(df, path, input.Models, scopeSet)
if anomalies == nil {
// file didn't have all models present
continue
}
output.FilesWithAllModels++
for _, a := range anomalies {
if a.Type == "label_mismatch" {
output.LabelMismatches++
} else {
output.CertaintyMismatches++
}
}
output.Anomalies = append(output.Anomalies, anomalies...)
}
output.AnomaliesTotal = len(output.Anomalies)
return output, nil
}

// labeledSeg pairs a segment with the specific label matching the model filter.
type labeledSeg struct {
seg *utils.Segment
label *utils.Label
}

// detectAnomaliesInFile returns nil if the file doesn't contain all required models.
func detectAnomaliesInFile(df *utils.DataFile, path string, models []string, scope map[string]bool) []Anomaly {
modelSegs := collectModelSegments(df, models)

// Skip file if any model is entirely absent.
for _, model := range models {
if len(modelSegs[model]) == 0 {
return nil
}
}

var anomalies []Anomaly
for _, anchor := range modelSegs[models[0]] {
if !inScope(anchor, scope) {
continue
}
if matches := findOverlappingMatches(anchor, models, modelSegs); matches == nil {
continue
} else {
group := buildComparisonGroup(anchor, models, matches)
if a := checkGroupAnomaly(group, path, models); a != nil {
anomalies = append(anomalies, *a)
}
}

// collectModelSegments groups labeled segments by model filter name.
func collectModelSegments(df *utils.DataFile, models []string) map[string][]labeledSeg {
modelSegs := make(map[string][]labeledSeg, len(models))
for _, seg := range df.Segments {
for _, lbl := range seg.Labels {
for _, model := range models {
if lbl.Filter == model {
modelSegs[model] = append(modelSegs[model], labeledSeg{seg: seg, label: lbl})
break
}
}
}
}
return modelSegs
}

// inScope returns true if the anchor's label is within the species scope filter.
func inScope(anchor labeledSeg, scope map[string]bool) bool {
if len(scope) == 0 {
return true
}
key := anchor.label.Species
if anchor.label.CallType != "" {
key += "+" + anchor.label.CallType
}
return scope[key] || scope[anchor.label.Species]
}

// findOverlappingMatches returns matches[model] = overlapping segments from that model,
// or nil if any model has no overlap (lonely anchor).
func findOverlappingMatches(anchor labeledSeg, models []string, modelSegs map[string][]labeledSeg) map[string][]labeledSeg {
matches := make(map[string][]labeledSeg, len(models)-1)
for _, model := range models[1:] {
for _, candidate := range modelSegs[model] {
if overlaps(anchor.seg, candidate.seg) {
matches[model] = append(matches[model], candidate)
}
}

// buildComparisonGroup assembles anchor + first match per other model.
func buildComparisonGroup(anchor labeledSeg, models []string, matches map[string][]labeledSeg) []labeledSeg {
group := []labeledSeg{anchor}
for _, model := range models[1:] {
group = append(group, matches[model][0])
}
return group
}

// checkGroupAnomaly checks a comparison group for label or certainty mismatches.
func checkGroupAnomaly(group []labeledSeg, path string, models []string) *Anomaly {
refSpecies := group[0].label.Species
refCallType := group[0].label.CallType
for _, ls := range group[1:] {
if ls.label.Species != refSpecies || ls.label.CallType != refCallType {
a := Anomaly{File: path, Type: "label_mismatch", Segments: buildAnomalySegs(group, models)}
return &a
}
}
refCertainty := group[0].label.Certainty
for _, ls := range group[1:] {
if ls.label.Certainty != refCertainty {
a := Anomaly{File: path, Type: "certainty_mismatch", Segments: buildAnomalySegs(group, models)}
return &a
}
}
return nil
}

func buildAnomalySegs(group []labeledSeg, models []string) []AnomalySegment {
segs := make([]AnomalySegment, len(group))
for i, ls := range group {
segs[i] = AnomalySegment{
Model: models[i],
Start: ls.seg.StartTime,
End: ls.seg.EndTime,
Species: ls.label.Species,
CallType: ls.label.CallType,
Certainty: ls.label.Certainty,
}
}
return segs
}

// overlaps returns true if two segments share any time overlap.
func overlaps(a, b *utils.Segment) bool {
return a.StartTime < b.EndTime && b.StartTime < a.EndTime
}
if len(matches[model]) == 0 {
return nil
}
}
return matches
}
}
return anomalies
}
if err := validateAnomalyInput(input); err != nil {
output.Error = err.Error()
return output, err
}
file deletion: calls_clip_labels_test.go (----------)

[6.248737]→[6.549826:549875](∅→∅),[6.549875]→[6.539425:539425](∅→∅)

package tools

import (
"encoding/csv"
"os"
"path/filepath"
"strings"
"testing"

"skraak/utils"
)

// --- test helpers (test file only) ---

func writeDataFile(t *testing.T, dir, name string, df *utils.DataFile) {
t.Helper()
if err := df.Write(filepath.Join(dir, name)); err != nil {
t.Fatalf("write .data file %s: %v", name, err)
}
}

func writeMapping(t *testing.T, dir, json string) {
t.Helper()
if err := os.WriteFile(filepath.Join(dir, "mapping.json"), []byte(json), 0644); err != nil {
t.Fatalf("write mapping.json: %v", err)
}
}

// parseCSV reads the output CSV, returning header and rows.
func parseCSV(t *testing.T, path string) ([]string, [][]string) {
t.Helper()
f, err := os.Open(path)
if err != nil {
t.Fatalf("open CSV %s: %v", path, err)
}
defer f.Close()
r := csv.NewReader(f)
header, err := r.Read()
if err != nil {
t.Fatalf("read header: %v", err)
}
rows, err := r.ReadAll()
if err != nil {
t.Fatalf("read rows: %v", err)
}
return header, rows
}

// clipLabels calls CallsClipLabels with standard test parameters.
func clipLabels(t *testing.T, dir string, extra ...func(*CallsClipLabelsInput)) CallsClipLabelsOutput {
t.Helper()
input := CallsClipLabelsInput{
Folder: dir,
MappingPath: filepath.Join(dir, "mapping.json"),
OutputPath: filepath.Join(dir, "clip_labels.csv"),
ClipDuration: 5,
ClipOverlap: 0,
MinLabelOverlap: 0.25,
FinalClip: "full",
}
for _, fn := range extra {
fn(&input)
}
out, err := CallsClipLabels(input)
if err != nil {
t.Fatalf("CallsClipLabels: %v", err)
}
return out
}

// --- tests ---

func TestClipLabels_RealClassTrue(t *testing.T) {
dir := t.TempDir()
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 20},
Segments: []*utils.Segment{
{
StartTime: 3, EndTime: 8, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)

out := clipLabels(t, dir)
header, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))

// Header: file, start_time, end_time, Kiwi
if len(header) != 4 || header[3] != "Kiwi" {
t.Fatalf("header = %v, want [..., Kiwi]", header)
}

// Clip 0-5 overlaps segment 3-8 by 2s ≥ 0.25 → Kiwi=True
// Clip 5-10 overlaps segment 3-8 by 3s ≥ 0.25 → Kiwi=True
// Clip 10-15, 15-20 → Kiwi=False
kiwiCol := 3
for i, row := range rows {
switch row[1] {
case "0.0", "5.0":
if row[kiwiCol] != "True" {
t.Errorf("row %d (start=%s): Kiwi=%s, want True", i, row[1], row[kiwiCol])
}
case "10.0", "15.0":
if row[kiwiCol] != "False" {
t.Errorf("row %d (start=%s): Kiwi=%s, want False", i, row[1], row[kiwiCol])
}
}
}
if out.PerClassTrueCount["Kiwi"] != 2 {
t.Errorf("PerClassTrueCount[Kiwi] = %d, want 2", out.PerClassTrueCount["Kiwi"])
}
}

func TestClipLabels_GapClipsAllFalse(t *testing.T) {
dir := t.TempDir()
// 15s file, Kiwi segment 0-5 only → clips 5-10 and 10-15 are gaps
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 15},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)

out := clipLabels(t, dir)
if out.ClipsAllFalseGap != 2 {
t.Errorf("ClipsAllFalseGap = %d, want 2", out.ClipsAllFalseGap)
}
if out.PerClassTrueCount["Kiwi"] != 1 {
t.Errorf("PerClassTrueCount[Kiwi] = %d, want 1", out.PerClassTrueCount["Kiwi"])
}
if out.RowsWritten != 3 {
t.Errorf("RowsWritten = %d, want 3", out.RowsWritten)
}
}

func TestClipLabels_NegativeOverridesPositive(t *testing.T) {
dir := t.TempDir()
// Kiwi segment 0-8, Not segment 0-4 → clip 0-5 overlaps both → __NEGATIVE__ wins
// Clip 5-10 overlaps only Kiwi (3s) → True
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 10},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 8, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
{
StartTime: 0, EndTime: 4, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Not", Certainty: 100, Filter: "f1"}},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Not":{"species":"__NEGATIVE__"}}`)

out := clipLabels(t, dir)
if out.ClipsNegative != 1 {
t.Errorf("ClipsNegative = %d, want 1", out.ClipsNegative)
}

_, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
// Clip 0-5: negative hit → all-False (Not overlaps 0-4 by 4s)
if rows[0][3] != "False" {
t.Errorf("clip 0-5 Kiwi = %s, want False (overridden by __NEGATIVE__)", rows[0][3])
}
// Clip 5-10: only Kiwi overlaps (3s) → True
if rows[1][3] != "True" {
t.Errorf("clip 5-10 Kiwi = %s, want True", rows[1][3])
}
}

func TestClipLabels_IgnoreExcludesClip(t *testing.T) {
dir := t.TempDir()
// Don't Know segment 0-5, Kiwi segment 6-10
// Clip 0-5 overlaps __IGNORE__ → excluded
// Clip 5-10 overlaps Kiwi → emitted with True
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 15},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Don't Know", Certainty: 0, Filter: "f1"}},
},
{
StartTime: 6, EndTime: 10, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Don't Know":{"species":"__IGNORE__"}}`)

out := clipLabels(t, dir)
if out.ClipsIgnored != 1 {
t.Errorf("ClipsIgnored = %d, want 1", out.ClipsIgnored)
}
if out.SegmentsIgnored != 1 {
t.Errorf("SegmentsIgnored = %d, want 1", out.SegmentsIgnored)
}
// Only 2 rows: clip 5-10 (Kiwi=True) and clip 10-15 (gap)
if out.RowsWritten != 2 {
t.Errorf("RowsWritten = %d, want 2", out.RowsWritten)
}
}

func TestClipLabels_FilterRestrictsLabels(t *testing.T) {
dir := t.TempDir()
// Same time range, two filters. Only "wanted" should contribute.
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 10},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{
{Species: "Kiwi", Certainty: 100, Filter: "wanted"},
{Species: "Not", Certainty: 100, Filter: "unwanted"},
},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Not":{"species":"__NEGATIVE__"}}`)

out := clipLabels(t, dir, func(in *CallsClipLabelsInput) { in.Filter = "wanted" })
// Only Kiwi from "wanted" filter → clip 0-5 should be Kiwi=True
// Not from "unwanted" filter should be ignored → no __NEGATIVE__ override
if out.ClipsNegative != 0 {
t.Errorf("ClipsNegative = %d, want 0 (Not filter excluded)", out.ClipsNegative)
}
if out.PerClassTrueCount["Kiwi"] != 1 {
t.Errorf("PerClassTrueCount[Kiwi] = %d, want 1", out.PerClassTrueCount["Kiwi"])
}
}

func TestClipLabels_MappingCoverageError(t *testing.T) {
dir := t.TempDir()
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 10},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Mystery", Certainty: 100, Filter: "f1"}},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)

input := CallsClipLabelsInput{
Folder: dir,
MappingPath: filepath.Join(dir, "mapping.json"),
OutputPath: filepath.Join(dir, "clip_labels.csv"),
ClipDuration: 5,
ClipOverlap: 0,
MinLabelOverlap: 0.25,
FinalClip: "full",
}
_, err := CallsClipLabels(input)
if err == nil {
t.Fatal("expected error for missing species in mapping")
}
if !strings.Contains(err.Error(), "Mystery") {
t.Errorf("error should mention missing species, got: %v", err)
}
}

func TestClipLabels_AppendMode(t *testing.T) {
dir := t.TempDir()
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)

// First file
writeDataFile(t, dir, "a.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 5},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
out1 := clipLabels(t, dir)
if out1.RowsWritten != 1 {
t.Fatalf("first run: RowsWritten = %d, want 1", out1.RowsWritten)
}

// Second run on same output file but with a different input folder
// Simulate append by running again — should fail on duplicate
_, err := CallsClipLabels(CallsClipLabelsInput{
Folder: dir,
MappingPath: filepath.Join(dir, "mapping.json"),
OutputPath: filepath.Join(dir, "clip_labels.csv"),
ClipDuration: 5,
ClipOverlap: 0,
MinLabelOverlap: 0.25,
FinalClip: "full",
})
if err == nil {
t.Fatal("expected duplicate error on second run with same folder")
}
if !strings.Contains(err.Error(), "duplicate") {
t.Errorf("error should mention duplicate, got: %v", err)
}
}

func TestClipLabels_MultipleFiles(t *testing.T) {
dir := t.TempDir()
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)

writeDataFile(t, dir, "a.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 10},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
writeDataFile(t, dir, "b.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 5},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})

out := clipLabels(t, dir)
if out.DataFilesParsed != 2 {
t.Errorf("DataFilesParsed = %d, want 2", out.DataFilesParsed)
}
// a: 2 clips (0-5, 5-10), b: 1 clip (0-5) = 3 total
if out.RowsWritten != 3 {
t.Errorf("RowsWritten = %d, want 3", out.RowsWritten)
}

_, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
files := map[string]int{}
for _, r := range rows {
files[r[0]]++
}
if len(files) != 2 {
t.Errorf("expected 2 distinct files in CSV, got %d", len(files))
}
}
file deletion: calls_clip_labels.go (----------)

[6.248737]→[6.563754:563798](∅→∅),[6.563798]→[6.549877:549877](∅→∅)

package tools

import (
"encoding/csv"
"fmt"
"io"
"os"
"path/filepath"
"slices"
"sort"
"strconv"
"strings"

"skraak/utils"
)

// CallsClipLabelsInput configures the clip-labels exporter.
type CallsClipLabelsInput struct {
Folder string `json:"folder"`
MappingPath string `json:"mapping"`
Filter string `json:"filter,omitempty"`
OutputPath string `json:"output"`
ClipDuration float64 `json:"clip_duration"`
ClipOverlap float64 `json:"clip_overlap"`
MinLabelOverlap float64 `json:"min_label_overlap"`
FinalClip string `json:"final_clip"`
}

// CallsClipLabelsOutput summarises a run.
type CallsClipLabelsOutput struct {
Folder string `json:"folder"`
OutputPath string `json:"output"`
Filter string `json:"filter,omitempty"`
Classes []string `json:"classes"`
DataFilesParsed int `json:"data_files_parsed"`
ClipsNegative int `json:"clips_negative"` // emitted, all-False because of __NEGATIVE__
ClipsIgnored int `json:"clips_ignored"` // excluded from output because of __IGNORE__ overlap
SegmentsIgnored int `json:"segments_ignored"` // segments whose species maps to __IGNORE__
ClipsAllFalseGap int `json:"clips_all_false_gap"` // emitted, all-False because no overlap
PerClassTrueCount map[string]int `json:"per_class_true_count"`
AppendedToFile bool `json:"appended_to_file"`
ExistingRowsFound int `json:"existing_rows_found"`
RowsWritten int `json:"rows_written"`
}

// resolvedSeg is a segment that has been classified by the mapping and is
// ready for overlap-checking against clip windows.
type resolvedSeg struct {
start, end float64
kind utils.MappingKind
classIdx int // valid only when kind == utils.MappingReal
}

// clipDisposition describes the outcome for a single clip window.
type clipDisposition int

const (
dispoLabelled clipDisposition = iota // at least one class column is True
dispoNegative // __NEGATIVE__ hit, all class columns False
dispoGap // no segment overlaps, all class columns False
dispoIgnored // __IGNORE__ hit, clip excluded from output
)

// clipLabelsRow is one row of the output CSV.
type clipLabelsRow struct {
file string
start float64
end float64
flags []bool
}

// rowKey is used for duplicate detection.
type rowKey struct {
file string
start string
end string
}

// CallsClipLabels reads .data files from a single folder and writes a CSV in
// OpenSoundScape's clip_labels format: one row per clip per file, with one
// True/False column per class in the mapping.
//
// Mirrors BoxedAnnotations.clip_labels(): every clip window is emitted; a
// column is True when any annotation of that class overlaps the window by
// ≥ min_label_overlap seconds. Sentinel mappings (__NEGATIVE__, __IGNORE__)
// get no column and contribute no labels.
// parsedClipFile holds a parsed .data file for clip-labels processing.
type parsedClipFile struct {
path string
df *utils.DataFile
}

// validateClipLabelsInput validates the input parameters and returns the parsed finalClipMode.
func validateClipLabelsInput(input CallsClipLabelsInput) (utils.FinalClipMode, error) {
finalClipMode, err := utils.ParseFinalClipMode(input.FinalClip)
if err != nil {
return 0, err
}
if input.ClipDuration <= 0 {
return 0, fmt.Errorf("--clip-duration must be > 0, got %v", input.ClipDuration)
}
if input.ClipOverlap < 0 || input.ClipOverlap >= input.ClipDuration {
return 0, fmt.Errorf("--clip-overlap must be in [0, clip-duration), got %v", input.ClipOverlap)
}
if input.MinLabelOverlap <= 0 {
return 0, fmt.Errorf("--min-label-overlap must be > 0, got %v", input.MinLabelOverlap)
}

// parseClipLabelsDataFiles finds and parses .data files, collecting species seen.
func parseClipLabelsDataFiles(folder, filter string, mapping utils.MappingFile) ([]parsedClipFile, error) {
dataPaths, err := utils.FindDataFiles(folder)
if err != nil {
return nil, fmt.Errorf("scan folder %s: %w", folder, err)
}
if len(dataPaths) == 0 {
return nil, fmt.Errorf("no .data files found in %s", folder)
}

speciesSeen := map[string]bool{}
for _, p := range dataPaths {
df, err := utils.ParseDataFile(p)
if err != nil {
return nil, fmt.Errorf("parse %s: %w", p, err)
}
if df.Meta == nil || df.Meta.Duration <= 0 {
return nil, fmt.Errorf("missing or non-positive Duration in %s (cannot generate clips)", p)
}
for _, seg := range df.Segments {
for _, lbl := range seg.Labels {
if filter != "" && lbl.Filter != filter {
continue
}
speciesSeen[lbl.Species] = true
}
}
parsed = append(parsed, parsedClipFile{path: p, df: df})
}

if missing := mapping.ValidateCoversSpecies(speciesSeen); len(missing) > 0 {
return nil, fmt.Errorf("mapping.json is missing entries for species: %s\n(run /data-mapping to regenerate)", strings.Join(missing, ", "))
}
return parsed, nil
}

// dedupClipLabelsRows checks for duplicate rows within new rows and against existing CSV rows.
func dedupClipLabelsRows(rows []clipLabelsRow, existing map[rowKey]bool) error {
dedup := make(map[rowKey]bool, len(existing)+len(rows))
for k := range existing {
dedup[k] = true
}

func CallsClipLabels(input CallsClipLabelsInput) (CallsClipLabelsOutput, error) {
out := CallsClipLabelsOutput{
Folder: input.Folder,
OutputPath: input.OutputPath,
PerClassTrueCount: map[string]int{},
}

finalClipMode, err := validateClipLabelsInput(input)
if err != nil {
return out, err
}

mapping, err := utils.LoadMappingFile(input.MappingPath)
if err != nil {
return out, fmt.Errorf("load mapping %s: %w", input.MappingPath, err)
}

classes := mapping.Classes()
if len(classes) == 0 {
return out, fmt.Errorf("mapping.json has no real (non-sentinel) classes")
}
out.Classes = classes
out.Filter = input.Filter
classIdx := map[string]int{}
for i, c := range classes {
classIdx[c] = i
}

parsed, err := parseClipLabelsDataFiles(input.Folder, input.Filter, mapping)
if err != nil {
return out, err
}
out.DataFilesParsed = len(parsed)

expectedHeader := append([]string{"file", "start_time", "end_time"}, classes...)
existing, appendMode, err := loadExistingRows(input.OutputPath, expectedHeader)
if err != nil {
return out, err
}
out.AppendedToFile = appendMode
out.ExistingRowsFound = len(existing)

cwd, err := os.Getwd()
if err != nil {
return out, fmt.Errorf("getwd: %w", err)
}
folderAbs, err := filepath.Abs(input.Folder)
if err != nil {
return out, fmt.Errorf("abs %s: %w", input.Folder, err)
}

rows := make([]clipLabelsRow, 0, 1024)
for _, pf := range parsed {
fileRows, err := processClipLabelsFile(pf.path, pf.df, mapping, classIdx, classes, input, finalClipMode, cwd, folderAbs, &out)
if err != nil {
return out, err
}
rows = append(rows, fileRows...)
}

if err := dedupClipLabelsRows(rows, existing); err != nil {
return out, err
}

if err := writeRows(input.OutputPath, expectedHeader, rows, appendMode); err != nil {
return out, err
}
out.RowsWritten = len(rows)

sort.Strings(out.Classes)
return out, nil
}

// processClipLabelsFile generates clip-labels rows for a single .data file.
func processClipLabelsFile(
path string,
df *utils.DataFile,
mapping utils.MappingFile,
classIdx map[string]int,
classes []string,
input CallsClipLabelsInput,
finalClipMode utils.FinalClipMode,
cwd, folderAbs string,
out *CallsClipLabelsOutput,
) ([]clipLabelsRow, error) {
windows, err := utils.GenerateClipTimes(
df.Meta.Duration,
input.ClipDuration,
input.ClipOverlap,
finalClipMode,
10,
)
if err != nil {
return nil, fmt.Errorf("generate clip windows for %s: %w", path, err)
}
if len(windows) == 0 {
return nil, nil
}

// resolveSegments maps segments to their classification and filters out mismatches.
func resolveSegments(
segments []*utils.Segment,
filter string,
minLabelOverlap float64,
mapping utils.MappingFile,
classIdx map[string]int,
out *CallsClipLabelsOutput,
) []resolvedSeg {
segs := make([]resolvedSeg, 0, len(segments))
for _, seg := range segments {
if seg.EndTime-seg.StartTime < minLabelOverlap {
continue
}
for _, lbl := range seg.Labels {
if filter != "" && lbl.Filter != filter {
continue
}
canon, kind, ok := mapping.Classify(lbl.Species)
if !ok {
continue
}
switch kind {
case utils.MappingIgn:
out.SegmentsIgnored++
segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind})
case utils.MappingNeg:
segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind})
case utils.MappingReal:
idx, present := classIdx[canon]
if !present {
continue
}
segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind, classIdx: idx})
}
}
}

// computeWavRelPath computes the relative path from cwd to the WAV file corresponding to a .data file.
func computeWavRelPath(dataPath, cwd, folderAbs string) (string, error) {
wavName := strings.TrimSuffix(filepath.Base(dataPath), ".data")
wavAbs := filepath.Join(folderAbs, wavName)
rel, err := filepath.Rel(cwd, wavAbs)
if err != nil {
rel = wavAbs
}
// Ensure relative paths start with ./ to match OPSO / pandas convention.
if rel != "" && !filepath.IsAbs(rel) && !strings.HasPrefix(rel, "."+string(filepath.Separator)) {
rel = "." + string(filepath.Separator) + rel
}

// labelClipWindows classifies each clip window and builds the output rows.
func labelClipWindows(windows []utils.ClipWindow, segs []resolvedSeg, rel string, classes []string, minLabelOverlap float64, out *CallsClipLabelsOutput) []clipLabelsRow {
var rows []clipLabelsRow
for _, w := range windows {
dispo, classHits := classifyClip(w, segs, minLabelOverlap, len(classes))

if dispo == dispoIgnored {
out.ClipsIgnored++
continue
}

row := clipLabelsRow{
file: rel,
start: w.Start,
end: w.End,
flags: make([]bool, len(classes)),
}

switch dispo {
case dispoNegative:
out.ClipsNegative++
case dispoGap:
out.ClipsAllFalseGap++
case dispoLabelled:
for i, hit := range classHits {
if hit {
row.flags[i] = true
out.PerClassTrueCount[classes[i]]++
}
}
}
rows = append(rows, row)
}
return rows
}

// classifyClip determines the disposition of a single clip window against
// the resolved segments. Priority: __IGNORE__ > __NEGATIVE__ > class labels.
func classifyClip(w utils.ClipWindow, segs []resolvedSeg, minLabelOverlap float64, nClasses int) (clipDisposition, []bool) {
ignoreHit := false
negativeHit := false
classHits := make([]bool, nClasses)

for _, s := range segs {
if overlapSeconds(s.start, s.end, w.Start, w.End) < minLabelOverlap {
continue
}
switch s.kind {
case utils.MappingIgn:
ignoreHit = true
case utils.MappingNeg:
negativeHit = true
case utils.MappingReal:
classHits[s.classIdx] = true
}
}

if ignoreHit {
return dispoIgnored, nil
}
if negativeHit {
return dispoNegative, classHits
}
for _, hit := range classHits {
if hit {
return dispoLabelled, classHits
}
}
return dispoGap, classHits
}

// loadExistingRows reads an existing output CSV and returns its row keys
// (for deduplication) and whether we're in append mode.
func loadExistingRows(outputPath string, expectedHeader []string) (map[rowKey]bool, bool, error) {
fi, err := os.Stat(outputPath)
if err != nil {
if os.IsNotExist(err) {
return nil, false, nil
}
return nil, false, fmt.Errorf("stat %s: %w", outputPath, err)
}
if fi.Size() == 0 {
return nil, false, nil
}

f, err := os.Open(outputPath)
if err != nil {
return nil, false, fmt.Errorf("open existing %s: %w", outputPath, err)
}
defer func() { _ = f.Close() }()

r := csv.NewReader(f)
r.FieldsPerRecord = -1

header, err := r.Read()
if err != nil {
return nil, false, fmt.Errorf("read header of existing %s: %w", outputPath, err)
}
if !slices.Equal(header, expectedHeader) {
return nil, false, fmt.Errorf("column-set mismatch in existing %s\n existing: %s\n new: %s",
outputPath, strings.Join(header, ","), strings.Join(expectedHeader, ","))
}

existing := map[rowKey]bool{}
for {
rec, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
return nil, false, fmt.Errorf("read row of existing %s: %w", outputPath, err)
}
if len(rec) < 3 {
return nil, false, fmt.Errorf("malformed row in existing %s: %v", outputPath, rec)
}
existing[rowKey{file: rec[0], start: rec[1], end: rec[2]}] = true
}

return existing, true, nil
}

// overlapSeconds returns the duration of overlap between two half-open intervals.
func overlapSeconds(aStart, aEnd, bStart, bEnd float64) float64 {
lo := max(aStart, bStart)
hi := min(aEnd, bEnd)
if hi <= lo {
return 0
}
return hi - lo
}

// formatTime renders a float to match pandas' default float repr in to_csv:
// always at least one decimal place, no trailing zeros beyond what's needed.
// e.g. 5 → "5.0", 5.5 → "5.5", 3.5001250000 → "3.500125".
func formatTime(v float64) string {
s := strconv.FormatFloat(v, 'f', -1, 64)
if !strings.ContainsRune(s, '.') {
s += ".0"
}
return s
}

// writeRows writes the clip-labels rows to a CSV file.
func writeRows(path string, header []string, rows []clipLabelsRow, appendMode bool) error {
var f *os.File
var err error
if appendMode {
f, err = os.OpenFile(path, os.O_APPEND|os.O_WRONLY, 0644)
} else {
f, err = os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
}
if err != nil {
return fmt.Errorf("open %s for write: %w", path, err)
}
defer func() { _ = f.Close() }()

w := csv.NewWriter(f)
if !appendMode {
if err := w.Write(header); err != nil {
return fmt.Errorf("write header: %w", err)
}
}

if len(rows) == 0 {
w.Flush()
return w.Error()
}
rec := make([]string, 3+len(rows[0].flags))
for _, r := range rows {
rec[0] = r.file
rec[1] = formatTime(r.start)
rec[2] = formatTime(r.end)
for i, b := range r.flags {
if b {
rec[3+i] = "True"
} else {
rec[3+i] = "False"
}
}
if err := w.Write(rec); err != nil {
return fmt.Errorf("write row: %w", err)
}
}
w.Flush()
return w.Error()
}
return rel, nil
}
return segs
}

return labelClipWindows(windows, segs, rel, classes, input.MinLabelOverlap, out), nil
}
}

segs := resolveSegments(df.Segments, input.Filter, input.MinLabelOverlap, mapping, classIdx, out)

rel, err := computeWavRelPath(path, cwd, folderAbs)
if err != nil {
return nil, err
for _, r := range rows {
k := rowKey{file: r.file, start: formatTime(r.start), end: formatTime(r.end)}
if dedup[k] {
return fmt.Errorf("duplicate clip detected: file=%s start=%s end=%s", k.file, k.start, k.end)
}
dedup[k] = true
}
return nil
}
parsed := make([]parsedClipFile, 0, len(dataPaths))
return finalClipMode, nil
}
file deletion: calls_clip_bench_test.go (----------)

[6.248737]→[6.574044:574092](∅→∅),[6.574092]→[6.563800:563800](∅→∅)

package tools

import (
"encoding/binary"
"math"
"os"
"testing"

"skraak/utils"
)

const benchWAV = "../audio/20211028_211500.WAV"

// ==================== WAV I/O ====================

func BenchmarkReadWAV(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_, _, err := utils.ReadWAVSamples(benchWAV)
if err != nil {
b.Fatal(err)
}
}
}

func BenchmarkConvertToFloat64_16bit(b *testing.B) {
// Simulate 16-bit mono WAV data (same size as test file: 14.32M samples)
numSamples := 14320000
data := make([]byte, numSamples*2)
for i := range numSamples {
binary.LittleEndian.PutUint16(data[i*2:], uint16(i%65536))
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_ = convertToFloat64Bench(data, 16, 1)
}
}

// Duplicate of convertToFloat64 for benchmarking (unexported in utils)
func convertToFloat64Bench(data []byte, bitsPerSample, channels int) []float64 {
bytesPerSample := bitsPerSample / 8
blockAlign := bytesPerSample * channels
numSamples := len(data) / blockAlign
samples := make([]float64, numSamples)
for i := range numSamples {
offset := i * blockAlign
sample := int16(binary.LittleEndian.Uint16(data[offset : offset+2]))
samples[i] = float64(sample) / 32768.0
}
return samples
}

func BenchmarkWriteWAV(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
b.Logf("segment samples=%d", len(segSamples))
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
f, _ := os.CreateTemp("", "bench_*.wav")
utils.WriteWAVFile(f.Name(), segSamples, sr)
f.Close()
os.Remove(f.Name())
}
}

// ==================== Resample ====================

func BenchmarkResampleRate_48k(b *testing.B) {
samples, _, _ := utils.ReadWAVSamples(benchWAV)
b.Logf("resampling %d samples 48000->16000", len(samples))
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
utils.ResampleRate(samples, 48000, 16000)
}
}

func BenchmarkResampleRate_250k(b *testing.B) {
samples, _, _ := utils.ReadWAVSamples(benchWAV)
b.Logf("resampling %d samples 250000->16000", len(samples))
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
utils.ResampleRate(samples, 250000, 16000)
}
}

// ==================== Spectrogram pipeline ====================

func BenchmarkExtractSegment(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
b.Logf("full file: %d samples, sr=%d", len(samples), sr)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
seg := utils.ExtractSegmentSamples(samples, sr, 872, 895)
if len(seg) == 0 {
b.Fatal("empty segment")
}
}
}

func BenchmarkPowerSpectrumFFT_512(b *testing.B) {
n := 512
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
frameData := make([]float64, n)
power := make([]float64, n/2+1)
scratch := make([]complex128, n)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
// Simulate the windowing step (Hann) + FFT
for j := range n {
frameData[j] = segSamples[j] * 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(j)/float64(n-1)))
}
utils.PowerSpectrumFFT(frameData, power, scratch)
}
}

func BenchmarkSpectrogram_23s(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
cfg := utils.DefaultSpectrogramConfig(16000)
b.Logf("segment samples=%d, windowSize=%d, hopSize=%d", len(segSamples), cfg.WindowSize, cfg.HopSize)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
spect := utils.GenerateSpectrogram(segSamples, cfg)
if spect == nil {
b.Fatal("nil spectrogram")
}
}
}

func BenchmarkSpectrogram_60s(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 0, 60)
cfg := utils.DefaultSpectrogramConfig(16000)
b.Logf("60s segment samples=%d", len(segSamples))
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
spect := utils.GenerateSpectrogram(segSamples, cfg)
if spect == nil {
b.Fatal("nil spectrogram")
}
}
}

// ==================== Image creation & resize ====================

func BenchmarkCreateGrayscaleImage(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
cfg := utils.DefaultSpectrogramConfig(16000)
spect := utils.GenerateSpectrogram(segSamples, cfg)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
img := utils.CreateGrayscaleImage(spect)
if img == nil {
b.Fatal("nil image")
}
}
}

func BenchmarkCreateRGBImage(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
cfg := utils.DefaultSpectrogramConfig(16000)
spect := utils.GenerateSpectrogram(segSamples, cfg)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
colorData := utils.ApplyL4Colormap(spect)
img := utils.CreateRGBImage(colorData)
if img == nil {
b.Fatal("nil image")
}
}
}

func BenchmarkApplyL4Colormap(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
cfg := utils.DefaultSpectrogramConfig(16000)
spect := utils.GenerateSpectrogram(segSamples, cfg)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
colorData := utils.ApplyL4Colormap(spect)
if colorData == nil {
b.Fatal("nil colormap")
}
}
}

func BenchmarkResizeGray224(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
cfg := utils.DefaultSpectrogramConfig(16000)
spect := utils.GenerateSpectrogram(segSamples, cfg)
img := utils.CreateGrayscaleImage(spect)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
resized := utils.ResizeImage(img, 224, 224)
if resized == nil {
b.Fatal("nil resize")
}
}
}

func BenchmarkResizeGray448(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
cfg := utils.DefaultSpectrogramConfig(16000)
spect := utils.GenerateSpectrogram(segSamples, cfg)
img := utils.CreateGrayscaleImage(spect)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
resized := utils.ResizeImage(img, 448, 448)
if resized == nil {
b.Fatal("nil resize")
}
}
}

// ==================== PNG write ====================

func BenchmarkWritePNG_224(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
cfg := utils.DefaultSpectrogramConfig(16000)
spect := utils.GenerateSpectrogram(segSamples, cfg)
img := utils.CreateGrayscaleImage(spect)
resized := utils.ResizeImage(img, 224, 224)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
f, _ := os.CreateTemp("", "bench_*.png")
utils.WritePNG(resized, f)
f.Close()
os.Remove(f.Name())
}
}

// ==================== Full pipeline ====================

func BenchmarkFullPipelineGray224(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
outputSR := sr
if sr > 16000 {
segSamples = utils.ResampleRate(segSamples, sr, 16000)
outputSR = 16000
}
cfg := utils.DefaultSpectrogramConfig(outputSR)
spect := utils.GenerateSpectrogram(segSamples, cfg)
img := utils.CreateGrayscaleImage(spect)
resized := utils.ResizeImage(img, 224, 224)
f, _ := os.CreateTemp("", "bench_*.png")
utils.WritePNG(resized, f)
f.Close()
os.Remove(f.Name())
utils.WriteWAVFile(f.Name(), segSamples, outputSR)
os.Remove(f.Name())
_ = resized
}
}

func BenchmarkFullPipelineColor448(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
outputSR := sr
if sr > 16000 {
segSamples = utils.ResampleRate(segSamples, sr, 16000)
outputSR = 16000
}
cfg := utils.DefaultSpectrogramConfig(outputSR)
spect := utils.GenerateSpectrogram(segSamples, cfg)
colorData := utils.ApplyL4Colormap(spect)
img := utils.CreateRGBImage(colorData)
resized := utils.ResizeImage(img, 448, 448)
f, _ := os.CreateTemp("", "bench_*.png")
utils.WritePNG(resized, f)
f.Close()
os.Remove(f.Name())
utils.WriteWAVFile(f.Name(), segSamples, outputSR)
os.Remove(f.Name())
_ = resized
}
}

// ==================== Data dimension report ====================

func TestPipelineDimensions(t *testing.T) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)

t.Logf("Input: %d samples, sr=%d, segment=%d samples (%.1fs)",
len(samples), sr, len(segSamples), float64(len(segSamples))/float64(sr))

cfg := utils.DefaultSpectrogramConfig(16000)
numFrames := (len(segSamples)-cfg.WindowSize)/cfg.HopSize + 1
numBins := cfg.WindowSize/2 + 1
t.Logf("Spectrogram: %d freq bins x %d time frames = %d values",
numBins, numFrames, numBins*numFrames)

spect := utils.GenerateSpectrogram(segSamples, cfg)
t.Logf("Output: %d x %d (freq x time)", len(spect), len(spect[0]))

img := utils.CreateGrayscaleImage(spect)
t.Logf("Grayscale image: %dx%d pixels, %d bytes",
img.Bounds().Dx(), img.Bounds().Dy(), img.Bounds().Dx()*img.Bounds().Dy())

resized := utils.ResizeImage(img, 224, 224)
t.Logf("Resized 224: %dx%d", resized.Bounds().Dx(), resized.Bounds().Dy())

resized448 := utils.ResizeImage(img, 448, 448)
t.Logf("Resized 448: %dx%d", resized448.Bounds().Dx(), resized448.Bounds().Dy())
}
file deletion: calls_clip.go (----------)

[6.248737]→[6.584765:584802](∅→∅),[6.584802]→[6.574094:574094](∅→∅)

package tools

import (
"fmt"
"image"
"math"
"os"
"path/filepath"
"runtime"
"strings"
"sync"

"skraak/utils"
)

// CallsClipInput defines the input for the clip tool
type CallsClipInput struct {
File string `json:"file"`
Folder string `json:"folder"`
Output string `json:"output"`
Prefix string `json:"prefix"`
Filter string `json:"filter"`
Species string `json:"species"`
Certainty int `json:"certainty"`
Size int `json:"size"`
Color bool `json:"color"`

Night bool `json:"night"`
Day bool `json:"day"`
Location string `json:"location,omitempty"`
}

// CallsClipOutput defines the output for the clip tool
type CallsClipOutput struct {
FilesProcessed int `json:"files_processed"`
SegmentsClipped int `json:"segments_clipped"`
NightSkipped int `json:"night_skipped,omitempty"`
DaySkipped int `json:"day_skipped,omitempty"`
OutputFiles []string `json:"output_files"`
Errors []string `json:"errors,omitempty"`
}

// CallsClip processes .data files and generates audio/image clips for matching segments
func CallsClip(input CallsClipInput) (CallsClipOutput, error) {
var output CallsClipOutput

// Validate required flags
if err := validateClipInput(&output, input); err != nil {
return output, err
}

// Parse species+calltype
speciesName, callType := utils.ParseSpeciesCallType(input.Species)

// Get list of .data files
filePaths, err := resolveClipFiles(&output, input)
if err != nil {
return output, err
}

// Create output folder if it doesn't exist
if err := os.MkdirAll(input.Output, 0755); err != nil {
output.Errors = append(output.Errors, fmt.Sprintf("failed to create output folder: %v", err))
return output, err
}

// Clamp image size to valid range
imgSize := utils.ClampImageSize(input.Size)

// Process .data files (parallel for larger batches)
if len(filePaths) <= 2 {
processFilesSequential(&output, filePaths, input, speciesName, callType, imgSize, lat, lng, timezone)
} else {
processFilesParallel(&output, filePaths, input, speciesName, callType, imgSize, lat, lng, timezone)
}

return output, nil
}

// validateClipInput validates required flags for clip generation.
func validateClipInput(output *CallsClipOutput, input CallsClipInput) error {
if input.File == "" && input.Folder == "" {
output.Errors = append(output.Errors, "either --file or --folder is required")
return fmt.Errorf("missing required flag: --file or --folder")
}
if input.Output == "" {
output.Errors = append(output.Errors, "--output is required")
return fmt.Errorf("missing required flag: --output")
}
if input.Prefix == "" {
output.Errors = append(output.Errors, "--prefix is required")
return fmt.Errorf("missing required flag: --prefix")
}
return nil
}

// resolveClipFiles returns the list of .data file paths from input.
func resolveClipFiles(output *CallsClipOutput, input CallsClipInput) ([]string, error) {
if input.File != "" {
return []string{input.File}, nil
}
filePaths, err := utils.FindDataFiles(input.Folder)
if err != nil {
output.Errors = append(output.Errors, fmt.Sprintf("failed to find .data files: %v", err))
return nil, err
}
if len(filePaths) == 0 {
output.Errors = append(output.Errors, "no .data files found")
return nil, fmt.Errorf("no .data files found")
}
return filePaths, nil
}

// processFilesSequential processes .data files one at a time.
func processFilesSequential(output *CallsClipOutput, filePaths []string, input CallsClipInput, speciesName, callType string, imgSize int, lat, lng float64, timezone string) {
for _, dataPath := range filePaths {
clips, skipped, errs := processFile(dataPath, input.Output, input.Prefix, input.Filter, speciesName, callType, input.Certainty, imgSize, input.Color, input.Night, input.Day, lat, lng, timezone)
accumulateFileResult(output, clips, skipped, errs, input.Night)
}
}

// processFilesParallel processes .data files using worker goroutines.
func processFilesParallel(output *CallsClipOutput, filePaths []string, input CallsClipInput, speciesName, callType string, imgSize int, lat, lng float64, timezone string) {
type fileResult struct {
clips []string
skipped int
errs []string
}

workers := min(runtime.NumCPU(), 8, len(filePaths))
jobs := make(chan string, len(filePaths))
results := make(chan fileResult, len(filePaths))

var wg sync.WaitGroup
for range workers {
wg.Go(func() {
for dataPath := range jobs {
clips, skipped, errs := processFile(dataPath, input.Output, input.Prefix, input.Filter, speciesName, callType, input.Certainty, imgSize, input.Color, input.Night, input.Day, lat, lng, timezone)
results <- fileResult{clips: clips, skipped: skipped, errs: errs}
}
})
}

for _, dataPath := range filePaths {
jobs <- dataPath
}

go func() {
wg.Wait()
close(results)
}()

for r := range results {
accumulateFileResult(output, r.clips, r.skipped, r.errs, input.Night)
}
}

// accumulateFileResult merges a single file's results into the output.
func accumulateFileResult(output *CallsClipOutput, clips []string, skipped int, errs []string, night bool) {
output.SegmentsClipped += len(clips)
if night {
output.NightSkipped += skipped
} else {
output.DaySkipped += skipped
}
output.OutputFiles = append(output.OutputFiles, clips...)
output.Errors = append(output.Errors, errs...)
if len(clips) > 0 || len(errs) == 0 {
output.FilesProcessed++
}
}

// processFile processes a single .data file and returns generated clips, time-filter-skipped count, and errors
func processFile(dataPath, outputDir, prefix, filter, speciesName, callType string, certainty, imgSize int, color, night, day bool, lat, lng float64, timezone string) ([]string, int, []string) {
var clips []string
var errors []string

// Parse .data file
dataFile, err := utils.ParseDataFile(dataPath)
if err != nil {
errors = append(errors, fmt.Sprintf("%s: failed to parse: %v", dataPath, err))
return nil, 0, errors
}

// Get WAV basename (without path and extensions)
wavPath := filepath.Clean(strings.TrimSuffix(dataPath, ".data"))
basename := filepath.Base(wavPath)
basename = strings.TrimSuffix(basename, filepath.Ext(basename))

// Filter segments
matchingSegments := filterSegments(dataFile.Segments, filter, speciesName, callType, certainty)
if len(matchingSegments) == 0 {
return nil, 0, nil
}

// Day/night filter: check WAV header only (cheaper than reading full audio).
if night || day {
skipped, err := checkDayNightFilter(wavPath, night, day, lat, lng, timezone)
if err != nil || skipped {
if skipped {
return nil, 1, nil
}
return nil, 0, nil
}
}

// Read WAV samples once
samples, sampleRate, err := utils.ReadWAVSamples(wavPath)
if err != nil {
errors = append(errors, fmt.Sprintf("%s: failed to read WAV: %v", dataPath, err))
return nil, 0, errors
}

// Process matching segments
clips, errors = processSegments(matchingSegments, dataPath, samples, sampleRate, outputDir, prefix, basename, imgSize, color)
return clips, 0, errors
}

// filterSegments returns segments matching the given filter criteria.
func filterSegments(segments []*utils.Segment, filter, speciesName, callType string, certainty int) []*utils.Segment {
var matching []*utils.Segment
for _, seg := range segments {
if seg.SegmentMatchesFilters(filter, speciesName, callType, certainty) {
matching = append(matching, seg)
}
}
return matching
}

// checkDayNightFilter applies day/night filtering. Returns (skipped=true, nil) if the
// recording should be skipped, (false, nil) if it passes, or (false, err) on failure.
func checkDayNightFilter(wavPath string, night, day bool, lat, lng float64, timezone string) (bool, error) {
result, err := IsNight(IsNightInput{
FilePath: wavPath,
Lat: lat,
Lng: lng,
Timezone: timezone,
})
if err != nil {
fmt.Fprintf(os.Stderr, "warning: skipping %s (isnight error: %v)\n", wavPath, err)
return false, err
}
if night && !result.SolarNight {
fmt.Fprintf(os.Stderr, "skipped (daytime): %s\n", wavPath)
return true, nil
}
if day && !result.DiurnalActive {
fmt.Fprintf(os.Stderr, "skipped (nighttime): %s\n", wavPath)
return true, nil
}
return false, nil
}

// processSegments generates clips for matching segments, using parallel processing for larger batches.
func processSegments(segments []*utils.Segment, dataPath string, samples []float64, sampleRate int, outputDir, prefix, basename string, imgSize int, color bool) ([]string, []string) {
var clips []string
var errors []string

if len(segments) <= 2 {
for _, seg := range segments {
clipFiles, err := generateClip(samples, sampleRate, outputDir, prefix, basename, seg.StartTime, seg.EndTime, imgSize, color)
if err != nil {
errors = append(errors, fmt.Sprintf("%s: segment %.0f-%.0f: %v", dataPath, seg.StartTime, seg.EndTime, err))
continue
}
clips = append(clips, clipFiles...)
}
} else {
clips, errors = processSegmentsParallel(segments, dataPath, samples, sampleRate, outputDir, prefix, basename, imgSize, color)
}

return clips, errors
}

// processSegmentsParallel generates clips for segments using worker goroutines.
func processSegmentsParallel(segments []*utils.Segment, dataPath string, samples []float64, sampleRate int, outputDir, prefix, basename string, imgSize int, color bool) ([]string, []string) {
type segResult struct {
clips []string
err string
}

workers := min(runtime.NumCPU(), len(segments))
jobs := make(chan *utils.Segment, len(segments))
results := make(chan segResult, len(segments))

var wg sync.WaitGroup
for range workers {
wg.Go(func() {
for seg := range jobs {
clipFiles, err := generateClip(samples, sampleRate, outputDir, prefix, basename, seg.StartTime, seg.EndTime, imgSize, color)
if err != nil {
results <- segResult{err: fmt.Sprintf("%s: segment %.0f-%.0f: %v", dataPath, seg.StartTime, seg.EndTime, err)}
} else {
results <- segResult{clips: clipFiles}
}
}
})
}

for _, seg := range segments {
jobs <- seg
}
close(jobs)

go func() {
wg.Wait()
close(results)
}()

var clips []string
var errors []string
for r := range results {
if r.err != "" {
errors = append(errors, r.err)
} else {
clips = append(clips, r.clips...)
}
}
return clips, errors
}

// generateClip generates PNG and WAV files for a segment
func generateClip(samples []float64, sampleRate int, outputDir, prefix, basename string, startTime, endTime float64, imgSize int, color bool) ([]string, error) {
var files []string

// Calculate integer times for filename
startInt := int(math.Floor(startTime))
endInt := int(math.Ceil(endTime))

// Build base filename
baseName := fmt.Sprintf("%s_%s_%d_%d", prefix, basename, startInt, endInt)
wavPath := filepath.Join(outputDir, baseName+".wav")

// Extract segment samples
segSamples := utils.ExtractSegmentSamples(samples, sampleRate, startTime, endTime)
if len(segSamples) == 0 {
return nil, fmt.Errorf("no samples in segment")
}

// Determine output sample rate (downsample if > 16kHz)
outputSampleRate := sampleRate
if sampleRate > utils.DefaultMaxSampleRate {
segSamples = utils.ResampleRate(segSamples, sampleRate, utils.DefaultMaxSampleRate)
outputSampleRate = utils.DefaultMaxSampleRate
}

pngPath := filepath.Join(outputDir, baseName+".png")

spectSampleRate := outputSampleRate
config := utils.DefaultSpectrogramConfig(spectSampleRate)
spectrogram := utils.GenerateSpectrogram(segSamples, config)
if spectrogram == nil {
return nil, fmt.Errorf("failed to generate spectrogram")
}

// Create image (grayscale or color)
var img image.Image
if color {
colorData := utils.ApplyL4Colormap(spectrogram)
img = utils.CreateRGBImage(colorData)
} else {
img = utils.CreateGrayscaleImage(spectrogram)
}
if img == nil {
return nil, fmt.Errorf("failed to create image")
}

resized := utils.ResizeImage(img, imgSize, imgSize)

// Write PNG (O_EXCL fails atomically if file exists)
pngFile, err := os.OpenFile(pngPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0644)
if err != nil {
if os.IsExist(err) {
return nil, fmt.Errorf("file already exists: %s", pngPath)
}
return nil, fmt.Errorf("failed to create PNG: %w", err)
}
if err := utils.WritePNG(resized, pngFile); err != nil {
_ = pngFile.Close()
return nil, fmt.Errorf("failed to write PNG: %w", err)
}
if err := pngFile.Close(); err != nil {
return nil, fmt.Errorf("failed to close PNG: %w", err)
}

// Write WAV
if err := utils.WriteWAVFile(wavPath, segSamples, outputSampleRate); err != nil {
return nil, fmt.Errorf("failed to write WAV: %w", err)
}
files = append(files, wavPath)

return files, nil
}
files = append(files, pngPath)
close(jobs)
// Parse location into lat/lng/timezone
var lat, lng float64
var timezone string
if input.Location != "" {
var err error
lat, lng, timezone, err = utils.ParseLocation(input.Location)
if err != nil {
output.Errors = append(output.Errors, err.Error())
return output, err
}
}
file deletion: calls_classify_test.go (----------)

[6.248737]→[6.590621:590667](∅→∅),[6.590667]→[6.584804:584804](∅→∅)

package tools

import (
"testing"

"skraak/utils"
)

func NewClassifyState(config ClassifyConfig, dataFiles []*utils.DataFile) *ClassifyState {
hasFilter := config.Filter != "" || config.Species != "" || config.Certainty >= 0
cached := make([][]*utils.Segment, len(dataFiles))
for i, df := range dataFiles {
if !hasFilter {
cached[i] = df.Segments
} else {
for _, seg := range df.Segments {
if seg.SegmentMatchesFilters(config.Filter, config.Species, config.CallType, config.Certainty) {
cached[i] = append(cached[i], seg)
}
}
}
}
total := 0
for _, segs := range cached {
total += len(segs)
}
return &ClassifyState{
Config: config,
DataFiles: dataFiles,
filteredSegs: cached,
totalSegs: total,
}
}

func TestParseKeyBuffer(t *testing.T) {
bindings := []KeyBinding{
{Key: "k", Species: "Kiwi"},
{Key: "d", Species: "Kiwi", CallType: "Duet"},
{Key: "n", Species: "Don't Know"},
{Key: "p", Species: "Morepork"},
}

state := NewClassifyState(ClassifyConfig{Bindings: bindings, Certainty: -1}, nil)

tests := []struct {
key string
want *BindingResult
wantNil bool
}{
{"k", &BindingResult{Species: "Kiwi"}, false},
{"d", &BindingResult{Species: "Kiwi", CallType: "Duet"}, false},
{"n", &BindingResult{Species: "Don't Know"}, false},
{"p", &BindingResult{Species: "Morepork"}, false},
{"x", nil, true}, // unknown key
}

for _, tt := range tests {
got := state.ParseKeyBuffer(tt.key)
if tt.wantNil {
if got != nil {
t.Errorf("ParseKeyBuffer(%q) = %v, want nil", tt.key, got)
}
} else {
if got == nil {
t.Errorf("ParseKeyBuffer(%q) = nil, want %+v", tt.key, tt.want)
continue
}
if got.Species != tt.want.Species {
t.Errorf("ParseKeyBuffer(%q).Species = %q, want %q", tt.key, got.Species, tt.want.Species)
}
if got.CallType != tt.want.CallType {
t.Errorf("ParseKeyBuffer(%q).CallType = %q, want %q", tt.key, got.CallType, tt.want.CallType)
}
}
}
}

func TestApplyBinding(t *testing.T) {
bindings := []KeyBinding{
{Key: "k", Species: "Kiwi"},
{Key: "n", Species: "Don't Know"},
{Key: "d", Species: "Kiwi", CallType: "Duet"},
}

df := &utils.DataFile{
Meta: &utils.DataMeta{},
Segments: []*utils.Segment{
{
StartTime: 10.0,
EndTime: 20.0,
Labels: []*utils.Label{
{Species: "Unknown", Certainty: 50, Filter: "test-filter", CallType: "OldType"},
},
},
},
}

state := NewClassifyState(ClassifyConfig{
Filter: "test-filter",
Reviewer: "David",
Bindings: bindings,
Certainty: -1,
}, []*utils.DataFile{df})

// Apply "k" = Kiwi (no calltype, should remove existing calltype)
result := &BindingResult{Species: "Kiwi"}
state.ApplyBinding(result)

// Check label was updated
if len(df.Segments[0].Labels) != 1 {
t.Errorf("expected 1 label, got %d", len(df.Segments[0].Labels))
}
if df.Segments[0].Labels[0].Species != "Kiwi" {
t.Errorf("expected Species=Kiwi, got %s", df.Segments[0].Labels[0].Species)
}
if df.Segments[0].Labels[0].Certainty != 100 {
t.Errorf("expected Certainty=100, got %d", df.Segments[0].Labels[0].Certainty)
}
if df.Segments[0].Labels[0].CallType != "" {
t.Errorf("expected CallType='', got %s (should be removed)", df.Segments[0].Labels[0].CallType)
}
if df.Meta.Reviewer != "David" {
t.Errorf("expected Reviewer=David, got %s", df.Meta.Reviewer)
}

// Apply "d" = Kiwi/Duet (should set calltype)
result = &BindingResult{Species: "Kiwi", CallType: "Duet"}
state.ApplyBinding(result)

if df.Segments[0].Labels[0].CallType != "Duet" {
t.Errorf("expected CallType=Duet, got %s", df.Segments[0].Labels[0].CallType)
}

// Apply "n" = Don't Know (certainty should be 0)
result = &BindingResult{Species: "Don't Know"}
state.ApplyBinding(result)

if df.Segments[0].Labels[0].Species != "Don't Know" {
t.Errorf("expected Species=Don't Know, got %s", df.Segments[0].Labels[0].Species)
}
if df.Segments[0].Labels[0].Certainty != 0 {
t.Errorf("expected Certainty=0 for Don't Know, got %d", df.Segments[0].Labels[0].Certainty)
}
}

func TestApplyBindingCallTypeRemoval(t *testing.T) {
bindings := []KeyBinding{
{Key: "k", Species: "Kiwi"}, // no calltype
}

df := &utils.DataFile{
Meta: &utils.DataMeta{},
Segments: []*utils.Segment{
{
StartTime: 10.0,
EndTime: 20.0,
Labels: []*utils.Label{
{Species: "Kiwi", Certainty: 100, Filter: "test-filter", CallType: "Male"},
},
},
},
}

state := NewClassifyState(ClassifyConfig{
Filter: "test-filter",
Reviewer: "David",
Bindings: bindings,
Certainty: -1,
}, []*utils.DataFile{df})

// Apply "k" = Kiwi (should remove Male calltype)
result := &BindingResult{Species: "Kiwi"}
state.ApplyBinding(result)

if df.Segments[0].Labels[0].CallType != "" {
t.Errorf("expected CallType='', got %s (should be removed)", df.Segments[0].Labels[0].CallType)
}
}

func TestConfirmLabelDontKnow(t *testing.T) {
df := &utils.DataFile{
Meta: &utils.DataMeta{},
Segments: []*utils.Segment{
{
StartTime: 10.0,
EndTime: 20.0,
Labels: []*utils.Label{
{Species: "Don't Know", Certainty: 0, Filter: "test-filter"},
},
},
},
}

state := NewClassifyState(ClassifyConfig{
Filter: "test-filter",
Reviewer: "David",
Certainty: -1,
}, []*utils.DataFile{df})

// ConfirmLabel on Don't Know should be a no-op
if state.ConfirmLabel() {
t.Error("ConfirmLabel() should return false for Don't Know (certainty=0)")
}

label := df.Segments[0].Labels[0]
if label.Species != "Don't Know" {
t.Errorf("Species should remain Don't Know, got %s", label.Species)
}
if label.Certainty != 0 {
t.Errorf("Certainty should remain 0, got %d", label.Certainty)
}
if state.Dirty {
t.Error("State should not be dirty after confirming Don't Know")
}
}
file deletion: calls_classify_load_test.go (----------)

[6.248737]→[6.597282:597333](∅→∅),[6.597333]→[6.590669:590669](∅→∅)

package tools

import (
"os"
"path/filepath"
"testing"
)

// writeDataFileContent creates a .data file in dir with the given raw content.
func writeDataFileContent(t *testing.T, dir, name, content string) {
t.Helper()
if err := os.WriteFile(filepath.Join(dir, name), []byte(content), 0644); err != nil {
t.Fatal(err)
}

// mustLoadDataFiles is a test helper that calls LoadDataFiles and fatals on error.
func mustLoadDataFiles(t *testing.T, config ClassifyConfig) *ClassifyState {
t.Helper()
state, err := LoadDataFiles(config)
if err != nil {
t.Fatal(err)
}

// assertFileSegCounts checks file count and total segment count match expected values.
func assertFileSegCounts(t *testing.T, state *ClassifyState, wantFiles, wantSegs int, label string) {
t.Helper()
if len(state.DataFiles) != wantFiles {
t.Errorf("%s: expected %d files, got %d", label, wantFiles, len(state.DataFiles))
}
if state.TotalSegments() != wantSegs {
t.Errorf("%s: expected %d segments total, got %d", label, wantSegs, state.TotalSegments())
}
}

const (
kiwiSeg = `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]]]`
tomtitSeg = `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Tomtit", "certainty": 90}]]]`
)

func TestLoadDataFilesFiltersFilesWithNoMatchingSegments(t *testing.T) {
tempDir := t.TempDir()

writeDataFileContent(t, tempDir, "file1.data", kiwiSeg)
writeDataFileContent(t, tempDir, "file2.data", tomtitSeg)
writeDataFileContent(t, tempDir, "file3.data", kiwiSeg)

t.Run("no_filter", func(t *testing.T) {
state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Certainty: -1})
assertFileSegCounts(t, state, 3, 3, "No filter")
})

t.Run("species_kiwi", func(t *testing.T) {
state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})
assertFileSegCounts(t, state, 2, 2, "Species=Kiwi")
})

t.Run("species_tomtit", func(t *testing.T) {
state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Tomtit", Certainty: -1})
assertFileSegCounts(t, state, 1, 1, "Species=Tomtit")
})

t.Run("species_nonexistent", func(t *testing.T) {
state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "NonExistent", Certainty: -1})
assertFileSegCounts(t, state, 0, 0, "Species=NonExistent")
})
}

func TestLoadDataFilesWithMixedSegments(t *testing.T) {
tempDir := t.TempDir()

file := `[
{"Operator": "test"},
[0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]],
[10, 20, 100, 1000, [{"species": "Tomtit", "certainty": 80}]],
[20, 30, 100, 1000, [{"species": "Kiwi", "certainty": 95}]]
]`
writeDataFileContent(t, tempDir, "mixed.data", file)

state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})

if len(state.DataFiles) != 1 {
t.Errorf("Expected 1 file, got %d", len(state.DataFiles))
}
if state.TotalSegments() != 2 {
t.Errorf("Species=Kiwi: expected 2 segments, got %d", state.TotalSegments())
}

// The DataFile should still have all 3 segments internally
// but cached filtered segments should return only the Kiwi ones
if len(state.DataFiles[0].Segments) != 3 {
t.Errorf("DataFile should have 3 segments internally, got %d", len(state.DataFiles[0].Segments))
}

// TotalSegments uses cached filtered segments
if state.TotalSegments() != 2 {
t.Errorf("TotalSegments should return 2 Kiwi segments, got %d", state.TotalSegments())
}
}

// Test that the original DataFile segments are not modified (immutable filtering)
func TestFilteringDoesNotModifyOriginalSegments(t *testing.T) {
tempDir := t.TempDir()

file := `[
{"Operator": "test"},
[0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]],
[10, 20, 100, 1000, [{"species": "Tomtit", "certainty": 80}]]
]`
writeDataFileContent(t, tempDir, "test.data", file)

state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})

// Original segments should be untouched
originalSegments := state.DataFiles[0].Segments
if len(originalSegments) != 2 {
t.Errorf("Original should have 2 segments, got %d", len(originalSegments))
}

// Verify all original segments are preserved
species := []string{}
for _, seg := range originalSegments {
if len(seg.Labels) > 0 {
species = append(species, seg.Labels[0].Species)
}
}
if len(species) != 2 || species[0] != "Kiwi" || species[1] != "Tomtit" {
t.Errorf("Original segments should have both species, got %v", species)
}
}

func TestLoadDataFilesCertaintyPruning(t *testing.T) {
tempDir := t.TempDir()

writeDataFileContent(t, tempDir, "file1.data", `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 70}]]]`)
writeDataFileContent(t, tempDir, "file2.data", `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 100}]]]`)

state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Certainty: 100})

assertFileSegCounts(t, state, 1, 1, "Certainty=100")

// CurrentSegment should work (not nil) because file1 was pruned
seg := state.CurrentSegment()
if seg == nil {
t.Error("CurrentSegment should not be nil after pruning")
}
}
return state
}
}
file deletion: calls_classify_filter_test.go (----------)

[6.248737]→[6.605661:605714](∅→∅),[6.605714]→[6.597335:597335](∅→∅)

package tools

import (
"math/rand"
"testing"

"skraak/utils"
)

func TestTotalSegmentsRespectsFilters(t *testing.T) {
// Create test data files with different species and filters
df1 := &utils.DataFile{
FilePath: "/test/file1.data",
Segments: []*utils.Segment{
{
StartTime: 0,
EndTime: 10,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0"},
},
},
{
StartTime: 10,
EndTime: 20,
Labels: []*utils.Label{
{Species: "Tomtit", Filter: "model-1.0"},
},
},
},
}

df2 := &utils.DataFile{
FilePath: "/test/file2.data",
Segments: []*utils.Segment{
{
StartTime: 0,
EndTime: 10,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0"},
},
},
},
}

// Test 1: No filters - should count all segments (3)
state1 := NewClassifyState(ClassifyConfig{Certainty: -1}, []*utils.DataFile{df1, df2})
if got := state1.TotalSegments(); got != 3 {
t.Errorf("No filters: expected 3 segments, got %d", got)
}

// Test 2: Filter by species "Kiwi" - should count only Kiwi segments (2)
state2 := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df1, df2})
if got := state2.TotalSegments(); got != 2 {
t.Errorf("Species=Kiwi: expected 2 segments, got %d", got)
}

// Test 3: Filter by species "Tomtit" - should count only Tomtit segments (1)
state3 := NewClassifyState(ClassifyConfig{Species: "Tomtit", Certainty: -1}, []*utils.DataFile{df1, df2})
if got := state3.TotalSegments(); got != 1 {
t.Errorf("Species=Tomtit: expected 1 segment, got %d", got)
}

// Test 4: Filter by filter name "model-1.0" - should count all segments (3)
state4 := NewClassifyState(ClassifyConfig{Filter: "model-1.0", Certainty: -1}, []*utils.DataFile{df1, df2})
if got := state4.TotalSegments(); got != 3 {
t.Errorf("Filter=model-1.0: expected 3 segments, got %d", got)
}

// Test 5: Filter by non-existent species - should count 0
state5 := NewClassifyState(ClassifyConfig{Species: "NonExistent", Certainty: -1}, []*utils.DataFile{df1, df2})
if got := state5.TotalSegments(); got != 0 {
t.Errorf("Species=NonExistent: expected 0 segments, got %d", got)
}

// Test 6: Combined filter + species
df3 := &utils.DataFile{
FilePath: "/test/file3.data",
Segments: []*utils.Segment{
{
StartTime: 0,
EndTime: 10,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0", CallType: "Duet"},
},
},
{
StartTime: 10,
EndTime: 20,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-2.0", CallType: "Male"},
},
},
},
}
state6 := NewClassifyState(ClassifyConfig{Filter: "model-1.0", Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df3})
if got := state6.TotalSegments(); got != 1 {
t.Errorf("Filter=model-1.0 + Species=Kiwi: expected 1 segment, got %d", got)
}
}

func TestCurrentSegmentNumberWithFilters(t *testing.T) {
// Create test data files
df1 := &utils.DataFile{
FilePath: "/test/file1.data",
Segments: []*utils.Segment{
{
StartTime: 0,
EndTime: 10,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0"},
},
},
{
StartTime: 10,
EndTime: 20,
Labels: []*utils.Label{
{Species: "Tomtit", Filter: "model-1.0"},
},
},
},
}

df2 := &utils.DataFile{
FilePath: "/test/file2.data",
Segments: []*utils.Segment{
{
StartTime: 0,
EndTime: 10,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0"},
},
},
},
}

// Test: Filter by species "Kiwi", at file 2, segment 0
// Should report current segment as 2 (first Kiwi in df1 + first Kiwi in df2)
state := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df1, df2})
state.FileIdx = 1 // at df2
state.SegmentIdx = 0

if got := state.CurrentSegmentNumber(); got != 2 {
t.Errorf("Species=Kiwi, at file 2, seg 0: expected current segment 2, got %d", got)
}
}

func TestCertaintyFiltering(t *testing.T) {
// Create test data files with different certainty levels
df := &utils.DataFile{
FilePath: "/test/file1.data",
Segments: []*utils.Segment{
{
StartTime: 0,
EndTime: 10,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0", Certainty: 70},
},
},
{
StartTime: 10,
EndTime: 20,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0", Certainty: 100},
},
},
{
StartTime: 20,
EndTime: 30,
Labels: []*utils.Label{
{Species: "Tomtit", Filter: "model-1.0", Certainty: 70},
},
},
},
}

// Test 1: Filter by certainty 70 - should get 2 segments
state1 := NewClassifyState(ClassifyConfig{Certainty: 70}, []*utils.DataFile{df})
if got := state1.TotalSegments(); got != 2 {
t.Errorf("Certainty=70: expected 2 segments, got %d", got)
}

// Test 2: Filter by certainty 100 - should get 1 segment
state2 := NewClassifyState(ClassifyConfig{Certainty: 100}, []*utils.DataFile{df})
if got := state2.TotalSegments(); got != 1 {
t.Errorf("Certainty=100: expected 1 segment, got %d", got)
}

// Test 3: Filter by certainty 0 - should get 0 segments
state3 := NewClassifyState(ClassifyConfig{Certainty: 0}, []*utils.DataFile{df})
if got := state3.TotalSegments(); got != 0 {
t.Errorf("Certainty=0: expected 0 segments, got %d", got)
}

// Test 4: Combined species + certainty
state4 := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: 70}, []*utils.DataFile{df})
if got := state4.TotalSegments(); got != 1 {
t.Errorf("Species=Kiwi + Certainty=70: expected 1 segment, got %d", got)
}
}

func TestSampling(t *testing.T) {
makeSegs := func(n int) []*utils.Segment {
s := make([]*utils.Segment, n)
for i := range s {
s[i] = &utils.Segment{StartTime: float64(i), EndTime: float64(i + 1)}
}
return s
}

df1 := &utils.DataFile{FilePath: "/test/f1.data", Segments: makeSegs(6)}
df2 := &utils.DataFile{FilePath: "/test/f2.data", Segments: makeSegs(4)}
kept := []*utils.DataFile{df1, df2}
cached := [][]*utils.Segment{df1.Segments, df2.Segments}

countTotal := func(c [][]*utils.Segment) int {
n := 0
for _, s := range c {
n += len(s)
}
return n
}

// 50% of 10 → 5
k, c := applySampling(kept, cached, 50, rand.New(rand.NewSource(42)))
if got := countTotal(c); got != 5 {
t.Errorf("sample 50%%: expected 5, got %d", got)
}
// Files must be in original chronological order
for i := 1; i < len(k); i++ {
if k[i].FilePath < k[i-1].FilePath {
t.Errorf("sample 50%%: files out of order at index %d", i)
}
}

// 10% of 10 → 1
_, c2 := applySampling(kept, cached, 10, rand.New(rand.NewSource(42)))
if got := countTotal(c2); got != 1 {
t.Errorf("sample 10%%: expected 1, got %d", got)
}

// 1% of 10 → clamp to 1
_, c3 := applySampling(kept, cached, 1, rand.New(rand.NewSource(42)))
if got := countTotal(c3); got != 1 {
t.Errorf("sample 1%%: expected 1 (clamped), got %d", got)
}

// 99% of 10 → 9
_, c4 := applySampling(kept, cached, 99, rand.New(rand.NewSource(42)))
if got := countTotal(c4); got != 9 {
t.Errorf("sample 99%%: expected 9, got %d", got)
}
}

func TestCertaintyPruning(t *testing.T) {
// Simulate the bug: first file has no matching certainty segments
df1 := &utils.DataFile{
FilePath: "/test/file1.data",
Segments: []*utils.Segment{
{
StartTime: 0,
EndTime: 10,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0", Certainty: 70},
},
},
},
}

df2 := &utils.DataFile{
FilePath: "/test/file2.data",
Segments: []*utils.Segment{
{
StartTime: 0,
EndTime: 10,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0", Certainty: 100},
},
},
},
}

// Without pruning (old bug): file1 is first, has no certainty=100 segments
// CurrentSegment() would return nil even though TotalSegments() > 0
state := NewClassifyState(ClassifyConfig{Certainty: 100}, []*utils.DataFile{df1, df2})

// TotalSegments should be 1 (only file2 has certainty 100)
if got := state.TotalSegments(); got != 1 {
t.Errorf("Certainty=100: expected 1 segment, got %d", got)
}

// CurrentSegment should work if files are properly pruned
// Note: this test assumes LoadDataFiles does the pruning
// Here we test the state after manual construction
}
}

func TestCallTypeNoneFiltering(t *testing.T) {
// Create test data: Kiwi with calltype, Kiwi without, Tomtit without
df := &utils.DataFile{
FilePath: "/test/file1.data",
Segments: []*utils.Segment{
{
StartTime: 0,
EndTime: 10,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0", CallType: "Male"},
},
},
{
StartTime: 10,
EndTime: 20,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0"}, // no calltype
},
},
{
StartTime: 20,
EndTime: 30,
Labels: []*utils.Label{
{Species: "Tomtit", Filter: "model-1.0"}, // no calltype, wrong species
},
},
},
}

// Test 1: --species Kiwi+_ should match only Kiwi with no calltype (1 segment)
state1 := NewClassifyState(ClassifyConfig{Species: "Kiwi", CallType: utils.CallTypeNone, Certainty: -1}, []*utils.DataFile{df})
if got := state1.TotalSegments(); got != 1 {
t.Errorf("Species=Kiwi+_: expected 1 segment, got %d", got)
}

// Test 2: --species Kiwi should still match all Kiwi (2 segments)
state2 := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df})
if got := state2.TotalSegments(); got != 2 {
t.Errorf("Species=Kiwi: expected 2 segments, got %d", got)
}

// Test 3: --species Kiwi+Male should still work as before (1 segment)
state3 := NewClassifyState(ClassifyConfig{Species: "Kiwi", CallType: "Male", Certainty: -1}, []*utils.DataFile{df})
if got := state3.TotalSegments(); got != 1 {
t.Errorf("Species=Kiwi+Male: expected 1 segment, got %d", got)
}
file deletion: calls_classify.go (----------)

[6.248737]→[6.622528:622569](∅→∅),[6.622569]→[6.605716:605716](∅→∅)

package tools

import (
"fmt"
"math/rand"
"os"
"path/filepath"
"slices"
"sort"
"strings"
"time"

"skraak/utils"
)

// KeyBinding maps a key to a species/calltype
type KeyBinding struct {
Key string // single char: "k", "n", "p"
Species string // "Kiwi", "Don't Know", "Morepork"
CallType string // "Duet", "Female", "Male" (optional)
}

// ClassifyConfig holds the configuration for classification
type ClassifyConfig struct {
Folder string
File string
Filter string
Species string // scope to this species (optional)
CallType string // scope to this calltype within species (optional)
Certainty int // scope to this certainty value, -1 = no filter (optional)
Sample int // random sample percentage 1-99, -1 = no sampling, 100 = no-op
Goto string // goto this file on startup (optional, basename match)
Reviewer string
Color bool
ImageSize int // spectrogram display size in pixels (0 = default)
Sixel bool
ITerm bool
Bindings []KeyBinding
// SecondaryBindings maps a primary binding key to per-species calltype
// keys. Invoked via Shift+primary-key: the species is labeled without
// advancing, and the next key is interpreted as a calltype.
SecondaryBindings map[string]map[string]string
Night bool
Day bool
Lat float64
Lng float64
Timezone string
}

// ClassifyState holds the current state for TUI
type ClassifyState struct {
Config ClassifyConfig
DataFiles []*utils.DataFile
filteredSegs [][]*utils.Segment // cached at load time, parallel to DataFiles
totalSegs int // pre-computed total segment count
FileIdx int
SegmentIdx int
Dirty bool
Player *utils.AudioPlayer
PlaybackSpeed float64 // Current playback speed (1.0 = normal, 0.5 = half speed)
TimeFilteredCount int // files skipped by --night or --day filter
}

// BindingResult represents parsed key result
type BindingResult struct {
Species string
CallType string // empty string = remove calltype
}

// LoadDataFiles loads all .data files for classification
// findDataFilePaths resolves the list of .data file paths from config.
func findDataFilePaths(config ClassifyConfig) ([]string, error) {
if config.File != "" {
return []string{config.File}, nil
}
paths, err := utils.FindDataFiles(config.Folder)
if err != nil {
return nil, fmt.Errorf("find data files: %w", err)
}
return paths, nil
}

// filterDataFileSegments applies segment and day/night filters to a single data file.
// Returns the filtered segments and whether the file should be kept.
// If the file is filtered out (no matching segments, or time-of-day), returns nil, false.
func filterDataFileSegments(df *utils.DataFile, config ClassifyConfig) ([]*utils.Segment, bool, int) {
segs := filterSegmentsByLabel(df.Segments, config)
if segs == nil {
return nil, false, 0
}

timeFiltered := 0
if config.Night || config.Day {
keep, tf := filterByTimeOfDay(df.FilePath, config)
if !keep {
return nil, false, tf
}
}

if len(filePaths) == 0 {
return nil, fmt.Errorf("no .data files found")
}

var dataFiles []*utils.DataFile
for _, path := range filePaths {
df, err := utils.ParseDataFile(path)
if err != nil {
continue
}
dataFiles = append(dataFiles, df)
}
if len(dataFiles) == 0 {
return nil, fmt.Errorf("no valid .data files")
}

sort.Slice(dataFiles, func(i, j int) bool {
return dataFiles[i].FilePath < dataFiles[j].FilePath
})

return dataFiles, nil
}

// filterDataFiles applies segment filters to each data file, returning kept files and their segments.
func filterDataFiles(dataFiles []*utils.DataFile, config ClassifyConfig) ([]*utils.DataFile, [][]*utils.Segment, int) {
var kept []*utils.DataFile
var cachedSegs [][]*utils.Segment
var timeFiltered int

for _, df := range dataFiles {
segs, keep, tf := filterDataFileSegments(df, config)
timeFiltered += tf
if !keep {
continue
}
kept = append(kept, df)
cachedSegs = append(cachedSegs, segs)
}

total := 0
for _, segs := range filteredSegs {
total += len(segs)
}

state := &ClassifyState{
Config: config,
DataFiles: dataFiles,
filteredSegs: filteredSegs,
totalSegs: total,
TimeFilteredCount: timeFiltered,
}

if config.Goto == "" {
return state, nil
}

for i, df := range state.DataFiles {
base := df.FilePath[strings.LastIndex(df.FilePath, "/")+1:]
if base == config.Goto {
state.FileIdx = i
return state, nil
}
}
return nil, fmt.Errorf("goto file not found (or has no matching segments): %s", config.Goto)
}

// applySampling randomly selects sample% of segments from the filtered set.
// The returned files and segments preserve the original chronological order.
func applySampling(kept []*utils.DataFile, cachedSegs [][]*utils.Segment, sample int, rng *rand.Rand) ([]*utils.DataFile, [][]*utils.Segment) {
flat := make([]struct{ fileIdx, segIdx int }, 0)
for fi, segs := range cachedSegs {
for si := range segs {
flat = append(flat, struct{ fileIdx, segIdx int }{fi, si})
}
}

targetCount := max(len(flat)*sample/100, 1)

rng.Shuffle(len(flat), func(i, j int) { flat[i], flat[j] = flat[j], flat[i] })
selected := flat[:targetCount]

// Restore chronological order before rebuilding
sort.Slice(selected, func(i, j int) bool {
if selected[i].fileIdx != selected[j].fileIdx {
return selected[i].fileIdx < selected[j].fileIdx
}
return selected[i].segIdx < selected[j].segIdx
})

newCached := make([][]*utils.Segment, len(cachedSegs))
for _, ref := range selected {
newCached[ref.fileIdx] = append(newCached[ref.fileIdx], cachedSegs[ref.fileIdx][ref.segIdx])
}

var newKept []*utils.DataFile
var finalCached [][]*utils.Segment
for i, segs := range newCached {
if len(segs) > 0 {
newKept = append(newKept, kept[i])
finalCached = append(finalCached, segs)
}
}
return newKept, finalCached
}

// FilteredSegs returns the cached filtered segments parallel to DataFiles.
func (s *ClassifyState) FilteredSegs() [][]*utils.Segment {
return s.filteredSegs
}

// CurrentFile returns the current data file
func (s *ClassifyState) CurrentFile() *utils.DataFile {
if s.FileIdx >= len(s.DataFiles) {
return nil
}
return s.DataFiles[s.FileIdx]
}

// CurrentSegment returns the current segment
func (s *ClassifyState) CurrentSegment() *utils.Segment {
if s.FileIdx >= len(s.filteredSegs) {
return nil
}
segs := s.filteredSegs[s.FileIdx]
if s.SegmentIdx >= len(segs) {
return nil
}
return segs[s.SegmentIdx]
}

// TotalSegments returns total segments to review
func (s *ClassifyState) TotalSegments() int {
return s.totalSegs
}

// CurrentSegmentNumber returns 1-based segment number
func (s *ClassifyState) CurrentSegmentNumber() int {
count := 0
for i := 0; i < s.FileIdx; i++ {
count += len(s.filteredSegs[i])
}
return count + s.SegmentIdx + 1
}

// NextSegment moves to the next segment, returns false if at end
func (s *ClassifyState) NextSegment() bool {
if s.FileIdx >= len(s.filteredSegs) {
return false
}

segs := s.filteredSegs[s.FileIdx]
if s.SegmentIdx+1 < len(segs) {
s.SegmentIdx++
return true
}

// Move to next file
if s.FileIdx+1 < len(s.DataFiles) {
s.FileIdx++
s.SegmentIdx = 0
return true
}

return false
}

// PrevSegment moves to the previous segment, returns false if at start
func (s *ClassifyState) PrevSegment() bool {
if s.SegmentIdx > 0 {
s.SegmentIdx--
return true
}

// Move to previous file
if s.FileIdx > 0 {
s.FileIdx--
segs := s.filteredSegs[s.FileIdx]
s.SegmentIdx = max(len(segs)-1, 0)
return true
}

return false
}

// ParseKeyBuffer parses a single key into binding result
func (s *ClassifyState) ParseKeyBuffer(key string) *BindingResult {
for _, b := range s.Config.Bindings {
if b.Key == key {
return &BindingResult{
Species: b.Species,
CallType: b.CallType,
}
}
}
return nil
}

// SetComment sets the comment on the current segment's filter label.
// Returns the previous comment (for undo) or empty string if none.
func (s *ClassifyState) SetComment(comment string) string {
seg := s.CurrentSegment()
if seg == nil {
return ""
}

df := s.CurrentFile()
if df == nil {
return ""
}

// Set reviewer
df.Meta.Reviewer = s.Config.Reviewer

// Get labels matching filter
filterLabels := seg.GetFilterLabels(s.Config.Filter)

var oldComment string
if len(filterLabels) == 0 {
// No matching labels, add new one with comment
label := &utils.Label{
Species: "Don't Know",
Certainty: 0,
Filter: s.Config.Filter,
Comment: comment,
}
seg.Labels = append(seg.Labels, label)
} else {
// Set comment on first matching label
oldComment = filterLabels[0].Comment
filterLabels[0].Comment = comment
}

s.Dirty = true
return oldComment
}

// GetCurrentComment returns the comment on the current segment's filter label.
func (s *ClassifyState) GetCurrentComment() string {
seg := s.CurrentSegment()
if seg == nil {
return ""
}

filterLabels := seg.GetFilterLabels(s.Config.Filter)
if len(filterLabels) == 0 {
return ""
}
return filterLabels[0].Comment
}

// ApplyBinding applies a binding result to the current segment
func (s *ClassifyState) ApplyBinding(result *BindingResult) {
seg := s.CurrentSegment()
if seg == nil {
return
}

df := s.CurrentFile()
if df == nil {
return
}

// Set reviewer
df.Meta.Reviewer = s.Config.Reviewer

// Get labels matching filter
filterLabels := seg.GetFilterLabels(s.Config.Filter)

// Determine certainty: 0 for Don't Know, 100 for others
certainty := 100
if result.Species == "Don't Know" {
certainty = 0
}

if len(filterLabels) == 0 {
// No matching labels, add new one
seg.Labels = append(seg.Labels, &utils.Label{
Species: result.Species,
Certainty: certainty,
Filter: s.Config.Filter,
CallType: result.CallType,
})
} else {
// Edit first matching label, remove rest
filterLabels[0].Species = result.Species
filterLabels[0].Certainty = certainty
filterLabels[0].CallType = result.CallType // always set (empty = remove)

// Remove extra matching labels
if len(filterLabels) > 1 {
var newLabels []*utils.Label
for _, l := range seg.Labels {
keep := !slices.Contains(filterLabels[1:], l)
if keep {
newLabels = append(newLabels, l)
}
}
seg.Labels = newLabels
}
}

// Re-sort labels
sort.Slice(seg.Labels, func(i, j int) bool {
return seg.Labels[i].Species < seg.Labels[j].Species
})

s.Dirty = true
}

// ApplyCallTypeOnly sets the CallType on the current segment's first
// filter-matching label. Used after a Shift+primary keypress labeled the
// species and we now receive the secondary key for the calltype.
// No-op if there is no matching label to update.
func (s *ClassifyState) ApplyCallTypeOnly(callType string) {
seg := s.CurrentSegment()
if seg == nil {
return
}
df := s.CurrentFile()
if df == nil {
return
}
filterLabels := seg.GetFilterLabels(s.Config.Filter)
if len(filterLabels) == 0 {
return
}
df.Meta.Reviewer = s.Config.Reviewer
filterLabels[0].CallType = callType
s.Dirty = true
}

// HasSecondary reports whether the given primary key has any secondary
// (calltype) bindings configured.
func (s *ClassifyState) HasSecondary(primaryKey string) bool {
return len(s.Config.SecondaryBindings[primaryKey]) > 0
}

// ConfirmLabel upgrades the current segment's existing filter label certainty
// to 100. Returns true if a write is needed (label existed and was below 100).
// Returns false for Don't Know (certainty=0) — confirming a Don't Know is a no-op;
// the caller should just advance to the next segment.
func (s *ClassifyState) ConfirmLabel() bool {
seg := s.CurrentSegment()
if seg == nil {
return false
}
filterLabels := seg.GetFilterLabels(s.Config.Filter)
if len(filterLabels) == 0 {
return false
}
if filterLabels[0].Certainty == 0 {
return false
}
if filterLabels[0].Certainty == 100 {
return false
}
df := s.CurrentFile()
if df == nil {
return false
}
df.Meta.Reviewer = s.Config.Reviewer
filterLabels[0].Certainty = 100
s.Dirty = true
return true
}

// Save saves the current file
func (s *ClassifyState) Save() error {
df := s.CurrentFile()
if df == nil {
return nil
}

if !s.Dirty {
return nil
}

err := df.Write(df.FilePath)
if err != nil {
return err
}

s.Dirty = false
return nil
}

// getFilterLabel returns the label matching the current filter, or first label if no filter.
func (s *ClassifyState) getFilterLabel(seg *utils.Segment) *utils.Label {
if s.Config.Filter == "" {
if len(seg.Labels) > 0 {
return seg.Labels[0]
}
return nil
}
for _, label := range seg.Labels {
if label.Filter == s.Config.Filter {
return label
}
}
return nil
}

// getOrCreateFilterLabel gets existing label or creates new one for the current filter.
func (s *ClassifyState) getOrCreateFilterLabel(seg *utils.Segment) *utils.Label {
label := s.getFilterLabel(seg)
if label != nil {
return label
}
// Create new label
label = &utils.Label{
Species: "Don't Know",
Certainty: 0,
Filter: s.Config.Filter,
}
seg.Labels = append(seg.Labels, label)
s.Dirty = true
return label
}

// HasBookmark returns true if current segment has a bookmark on the filter label.
func (s *ClassifyState) HasBookmark() bool {
seg := s.CurrentSegment()
if seg == nil {
return false
}
label := s.getFilterLabel(seg)
return label != nil && label.Bookmark
}

// ToggleBookmark toggles the bookmark on the current segment's filter label.
func (s *ClassifyState) ToggleBookmark() {
seg := s.CurrentSegment()
if seg == nil {
return
}

df := s.CurrentFile()
if df == nil {
return
}

// Set reviewer
df.Meta.Reviewer = s.Config.Reviewer

label := s.getOrCreateFilterLabel(seg)
label.Bookmark = !label.Bookmark
s.Dirty = true
}

// NextBookmark navigates to the next bookmark, wrapping around if needed.
// Returns false if no bookmarks found (back at start position).
func (s *ClassifyState) NextBookmark() bool {
startFile := s.FileIdx
startSeg := s.SegmentIdx
first := true

for {
// Advance to next segment
if !s.NextSegment() {
// Wrap to start of folder
s.FileIdx = 0
s.SegmentIdx = 0
}

// Check if we've looped back to start
if !first && s.FileIdx == startFile && s.SegmentIdx == startSeg {
return false // full circle, no bookmark found
}
first = false

// Check if current segment has bookmark
if s.hasFilterBookmark() {
return true
}
}
}

// PrevBookmark navigates to the previous bookmark, wrapping around if needed.
// Returns false if no bookmarks found (back at start position).
func (s *ClassifyState) PrevBookmark() bool {
startFile := s.FileIdx
startSeg := s.SegmentIdx
first := true

for {
// Move to previous segment
if !s.PrevSegment() {
// Wrap to end of folder
s.FileIdx = len(s.DataFiles) - 1
segs := s.filteredSegs[s.FileIdx]
s.SegmentIdx = max(len(segs)-1, 0)
}

// Check if we've looped back to start
if !first && s.FileIdx == startFile && s.SegmentIdx == startSeg {
return false // full circle, no bookmark found
}
first = false

// Check if current segment has bookmark
if s.hasFilterBookmark() {
return true
}
}
}

// hasFilterBookmark checks if current segment has bookmark on filter-matching label.
func (s *ClassifyState) hasFilterBookmark() bool {
seg := s.CurrentSegment()
if seg == nil {
return false
}
label := s.getFilterLabel(seg)
return label != nil && label.Bookmark
}

// FormatLabels formats labels for display
func FormatLabels(labels []*utils.Label, filter string) string {
var parts []string
for _, l := range labels {
if filter != "" && l.Filter != filter {
continue
}
part := l.Species
if l.CallType != "" {
part += "/" + l.CallType
}
part += fmt.Sprintf(" (%d%%)", l.Certainty)
if l.Filter != "" {
part += " [" + l.Filter + "]"
}
if l.Comment != "" {
part += fmt.Sprintf(" \"%s\"", l.Comment)
}
parts = append(parts, part)
}
return strings.Join(parts, ", ")
}
// buildClassifyState constructs the ClassifyState, handling --goto file positioning.
func buildClassifyState(config ClassifyConfig, dataFiles []*utils.DataFile, filteredSegs [][]*utils.Segment, timeFiltered int) (*ClassifyState, error) {
return kept, cachedSegs, timeFiltered
}
func LoadDataFiles(config ClassifyConfig) (*ClassifyState, error) {
filePaths, err := findDataFilePaths(config)
if err != nil {
return nil, err
}
dataFiles, err := parseAndSortDataFiles(config)
if err != nil {
return nil, err
}

kept, cachedSegs, timeFiltered := filterDataFiles(dataFiles, config)

if config.Sample > 0 && config.Sample < 100 {
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
kept, cachedSegs = applySampling(kept, cachedSegs, config.Sample, rng)
}

return buildClassifyState(config, kept, cachedSegs, timeFiltered)
}

// parseAndSortDataFiles finds, parses, and sorts .data files from the config.
func parseAndSortDataFiles(config ClassifyConfig) ([]*utils.DataFile, error) {
// filterByTimeOfDay checks --night/--day time-of-day filter for a .data file.
// Returns (keep, timeFilteredCount).
func filterByTimeOfDay(dataFilePath string, config ClassifyConfig) (bool, int) {
wavPath := filepath.Clean(strings.TrimSuffix(dataFilePath, ".data"))
result, err := IsNight(IsNightInput{
FilePath: wavPath,
Lat: config.Lat,
Lng: config.Lng,
Timezone: config.Timezone,
})
if err != nil {
fmt.Fprintf(os.Stderr, "warning: skipping %s (isnight error: %v)\n", wavPath, err)
return false, 1
}
if config.Night && !result.SolarNight {
return false, 1
}
if config.Day && !result.DiurnalActive {
return false, 1
}
return true, 0
}

return segs, true, timeFiltered
}
}

// filterSegmentsByLabel applies label/species/certainty filters, returning matching segments.
// Returns nil if no segments match (caller should skip the file).
func filterSegmentsByLabel(segments []*utils.Segment, config ClassifyConfig) []*utils.Segment {
hasFilter := config.Filter != "" || config.Species != "" || config.Certainty >= 0
if !hasFilter {
return segments
}
var segs []*utils.Segment
for _, seg := range segments {
if seg.SegmentMatchesFilters(config.Filter, config.Species, config.CallType, config.Certainty) {
segs = append(segs, seg)
}
}
return segs // nil if empty, caller treats as "skip"
file deletion: bulk_file_import.go (----------)

[6.248737]→[6.638264:638307](∅→∅),[6.638307]→[6.622571:622571](∅→∅)

package tools

import (
"context"
"database/sql"
"encoding/csv"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"time"

"skraak/db"
"skraak/utils"
)

// BulkFileImportInput defines the input parameters for the bulk_file_import tool
type BulkFileImportInput struct {
DatasetID string `json:"dataset_id"`
CSVPath string `json:"csv_path"`
LogFilePath string `json:"log_file_path"`
}

// BulkFileImportOutput defines the output structure for the bulk_file_import tool
type BulkFileImportOutput struct {
TotalLocations int `json:"total_locations"`
ClustersCreated int `json:"clusters_created"`
ClustersExisting int `json:"clusters_existing"`
TotalFilesScanned int `json:"total_files_scanned"`
FilesImported int `json:"files_imported"`
FilesDuplicate int `json:"files_duplicate"`
FilesError int `json:"files_error"`
ProcessingTime string `json:"processing_time"`
Errors []string `json:"errors,omitempty"`
}

// bulkLocationData holds CSV row data for a location
type bulkLocationData struct {
LocationName string
LocationID string
DirectoryPath string
DateRange string
SampleRate int
FileCount int
}

// bulkImportStats tracks import statistics for a single cluster
type bulkImportStats struct {
TotalFiles int
ImportedFiles int
DuplicateFiles int
ErrorFiles int
}

// progressLogger handles writing to both log file and internal buffer
type progressLogger struct {
file *os.File
buffer *strings.Builder
}

// Log writes a formatted message with timestamp to both log file and buffer
func (l *progressLogger) Log(format string, args ...any) {
timestamp := time.Now().Format("2006-01-02 15:04:05")
message := fmt.Sprintf(format, args...)
line := fmt.Sprintf("[%s] %s\n", timestamp, message)

// Write to file; log write failures are non-fatal for import progress
if _, err := l.file.WriteString(line); err != nil {
fmt.Fprintf(os.Stderr, "Warning: log write failed: %v\n", err)
}
if err := l.file.Sync(); err != nil {
fmt.Fprintf(os.Stderr, "Warning: log sync failed: %v\n", err)
}

// Also keep in memory for potential error reporting
l.buffer.WriteString(line)
}

// BulkFileImport imports WAV files across multiple locations using CSV specification
func BulkFileImport(
ctx context.Context,
input BulkFileImportInput,
) (BulkFileImportOutput, error) {
startTime := time.Now()
var output BulkFileImportOutput

// Open log file
logFile, err := os.OpenFile(input.LogFilePath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
if err != nil {
return output, fmt.Errorf("failed to open log file: %w", err)
}
defer func() { _ = logFile.Close() }()

logger := &progressLogger{
file: logFile,
buffer: &strings.Builder{},
}

logger.Log("Starting bulk file import for dataset %s", input.DatasetID)

// Phase 0: Validate input
logger.Log("Validating input parameters...")
if err := bulkValidateInput(input); err != nil {
logger.Log("ERROR: Validation failed: %v", err)
output.failOutput([]string{fmt.Sprintf("validation failed: %v", err)}, startTime)
return output, fmt.Errorf("validation failed: %w", err)
}
logger.Log("Validation complete")

// Phase 1: Read CSV
logger.Log("Reading CSV file: %s", input.CSVPath)
locations, err := bulkReadCSV(input.CSVPath)
if err != nil {
logger.Log("ERROR: Failed to read CSV: %v", err)
output.failOutput([]string{fmt.Sprintf("failed to read CSV: %v", err)}, startTime)
return output, fmt.Errorf("failed to read CSV: %w", err)
}
logger.Log("Loaded %d locations from CSV", len(locations))
output.TotalLocations = len(locations)

// Phase 1.5: Validate all location_ids belong to the dataset
logger.Log("Validating location_ids belong to dataset...")
output.failOutput([]string{err.Error()}, startTime)
return output, err
}
logger.Log("Location validation complete")

// Phase 2: Create/Validate Clusters
logger.Log("=== Phase 1: Creating/Validating Clusters ===")
if err != nil {
logger.Log("ERROR: Failed to open database: %v", err)
output.failOutput([]string{fmt.Sprintf("failed to open database: %v", err)}, startTime)
return output, fmt.Errorf("failed to open database: %w", err)
}
defer database.Close()

clusterIDMap, created, existing, err := bulkCreateClusters(ctx, database, logger, locations, input.DatasetID)
if err != nil {
output.failOutput(output.Errors, startTime)
return output, err
}

logger.Log("=== Phase 2: Importing Files ===")
fileStats, errs := bulkImportAllFiles(database, logger, locations, clusterIDMap, input.DatasetID)
output.TotalFilesScanned = fileStats.TotalFiles
output.FilesImported = fileStats.ImportedFiles
output.FilesDuplicate = fileStats.DuplicateFiles
output.FilesError = fileStats.ErrorFiles
output.Errors = append(output.Errors, errs...)

if len(errs) > 0 {
output.ProcessingTime = time.Since(startTime).String()
return output, fmt.Errorf("failed to import files: %s", errs[0])
}

logger.Log("=== Import Complete ===")
logger.Log("Total files scanned: %d", fileStats.TotalFiles)
logger.Log("Files imported: %d", fileStats.ImportedFiles)
logger.Log("Duplicates skipped: %d", fileStats.DuplicateFiles)
logger.Log("Errors: %d", fileStats.ErrorFiles)
logger.Log("Processing time: %s", time.Since(startTime).Round(time.Second))

output.ProcessingTime = time.Since(startTime).String()

return output, nil
}

// bulkValidateInput validates input parameters
func bulkValidateInput(input BulkFileImportInput) error {
// Validate ID format first (fast fail before DB queries)
if err := utils.ValidateShortID(input.DatasetID, "dataset_id"); err != nil {
return err
}

// Verify CSV file exists
if _, err := os.Stat(input.CSVPath); err != nil {
return fmt.Errorf("CSV file not accessible: %w", err)
}

// Verify log file path is writable
logDir := filepath.Dir(input.LogFilePath)
if _, err := os.Stat(logDir); err != nil {
return fmt.Errorf("log file directory not accessible: %w", err)
}

// Open database for validation queries
if err != nil {
return fmt.Errorf("failed to open database: %w", err)
}
defer database.Close()

// Verify dataset exists and is structured
if err := db.ValidateDatasetTypeForImport(database, input.DatasetID); err != nil {
return err
}

return nil
}

// bulkValidateLocationsBelongToDataset validates that all unique location_ids in the CSV belong to the dataset
func bulkValidateLocationsBelongToDataset(dbConn *sql.DB, locations []bulkLocationData, datasetID string) []string {
var errors []string

// Collect unique location_ids
uniqueLocations := make(map[string]bool)
for _, loc := range locations {
uniqueLocations[loc.LocationID] = true
}

// Validate each unique location_id
for locationID := range uniqueLocations {
if err := db.ValidateLocationBelongsToDataset(dbConn, locationID, datasetID); err != nil {
errors = append(errors, err.Error())
}
}

return errors
}

var clusterID string
if err == sql.ErrNoRows {
clusterID, err = bulkCreateCluster(ctx, database, datasetID, loc.LocationID, loc.DateRange, loc.SampleRate)
if err != nil {
logger.Log("ERROR: Failed to create cluster for location %s: %v", loc.LocationName, err)
return nil, 0, 0, fmt.Errorf("failed to create cluster: %w", err)
}
logger.Log(" Created cluster: %s", clusterID)
created++
} else if err != nil {
logger.Log("ERROR: Failed to check cluster for location %s: %v", loc.LocationName, err)
return nil, 0, 0, fmt.Errorf("failed to check cluster: %w", err)
} else {
clusterID = existingClusterID
logger.Log(" Using existing cluster: %s", clusterID)
existing++
}

compositeKey := loc.LocationID + "|" + loc.DateRange
clusterIDMap[compositeKey] = clusterID
}

return clusterIDMap, created, existing, nil
}

// bulkImportAllFiles imports files for all locations using the cluster ID map.
// Returns aggregate stats and any error messages.
func bulkImportAllFiles(database *sql.DB, logger *progressLogger, locations []bulkLocationData, clusterIDMap map[string]string, datasetID string) (bulkImportStats, []string) {
var total bulkImportStats
var errs []string

for i, loc := range locations {
compositeKey := loc.LocationID + "|" + loc.DateRange
clusterID, ok := clusterIDMap[compositeKey]
if !ok {
continue
}

logger.Log("[%d/%d] Importing files for: %s", i+1, len(locations), loc.LocationName)
logger.Log(" Directory: %s", loc.DirectoryPath)

if _, err := os.Stat(loc.DirectoryPath); os.IsNotExist(err) {
logger.Log(" WARNING: Directory not found, skipping")
continue
}

stats, err := bulkImportFilesForCluster(database, logger, loc.DirectoryPath, datasetID, loc.LocationID, clusterID)
if err != nil {
errMsg := fmt.Sprintf("Failed to import files for location %s: %v", loc.LocationName, err)
logger.Log("ERROR: %s", errMsg)
return total, []string{errMsg}
}

logger.Log(" Scanned: %d files", stats.TotalFiles)
logger.Log(" Imported: %d, Duplicates: %d", stats.ImportedFiles, stats.DuplicateFiles)
if stats.ErrorFiles > 0 {
logger.Log(" Errors: %d files", stats.ErrorFiles)
}

total.TotalFiles += stats.TotalFiles
total.ImportedFiles += stats.ImportedFiles
total.DuplicateFiles += stats.DuplicateFiles
total.ErrorFiles += stats.ErrorFiles
}

return total, errs
}
func bulkReadCSV(path string) ([]bulkLocationData, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer func() { _ = file.Close() }()

reader := csv.NewReader(file)
records, err := reader.ReadAll()
if err != nil {
return nil, err
}

if len(records) == 0 {
return nil, fmt.Errorf("CSV file is empty")
}

var locations []bulkLocationData
for i, record := range records {
if i == 0 {
continue // Skip header
}

if len(record) < 6 {
return nil, fmt.Errorf("CSV row %d has insufficient columns (expected 6, got %d)", i+1, len(record))
}

// Validate required string fields are non-empty
locationName := strings.TrimSpace(record[0])
if locationName == "" {
return nil, fmt.Errorf("empty location_name in row %d", i+1)
}
directoryPath := strings.TrimSpace(record[2])
if directoryPath == "" {
return nil, fmt.Errorf("empty directory_path in row %d", i+1)
}
dateRange := strings.TrimSpace(record[3])
if dateRange == "" {
return nil, fmt.Errorf("empty date_range in row %d", i+1)
}

// Validate location_id format
locationID := record[1]
if err := utils.ValidateShortID(locationID, "location_id"); err != nil {
return nil, fmt.Errorf("invalid location_id in row %d: %v", i+1, err)
}

sampleRate, err := strconv.Atoi(record[4])
if err != nil {
return nil, fmt.Errorf("invalid sample_rate in row %d: %v", i+1, err)
}

// Validate sample rate is in reasonable range
if err := utils.ValidateSampleRate(sampleRate); err != nil {
return nil, fmt.Errorf("invalid sample_rate in row %d: %v", i+1, err)
}

fileCount, err := strconv.Atoi(record[5])
if err != nil {
return nil, fmt.Errorf("invalid file_count in row %d: %v", i+1, err)
}

locations = append(locations, bulkLocationData{
LocationName: locationName,
LocationID: locationID,
DirectoryPath: directoryPath,
DateRange: dateRange,
SampleRate: sampleRate,
FileCount: fileCount,
})
}

return locations, nil
}

// bulkCreateCluster creates a new cluster in the database
func bulkCreateCluster(ctx context.Context, database *sql.DB, datasetID, locationID, name string, sampleRate int) (string, error) {
// Generate a 12-character nanoid
clusterID, err := utils.GenerateShortID()
if err != nil {
return "", fmt.Errorf("failed to generate cluster ID: %v", err)
}
now := time.Now().UTC()

// Get location name for the path
var locationName string
err = database.QueryRow("SELECT name FROM location WHERE id = ?", locationID).Scan(&locationName)
if err != nil {
return "", fmt.Errorf("failed to get location name: %v", err)
}

// Normalize path: replace spaces and special characters
path := strings.ReplaceAll(locationName, " ", "_")
path = strings.ReplaceAll(path, "/", "_")

tx, err := db.BeginLoggedTx(ctx, database, "bulk_file_import")
if err != nil {
return "", fmt.Errorf("failed to begin transaction: %w", err)
}
defer tx.Rollback()

_, err = tx.ExecContext(ctx, `
INSERT INTO cluster (id, dataset_id, location_id, name, path, sample_rate, active, created_at, last_modified)
VALUES (?, ?, ?, ?, ?, ?, true, ?, ?)
`, clusterID, datasetID, locationID, name, path, sampleRate, now, now)
if err != nil {
return "", fmt.Errorf("failed to insert cluster: %w", err)
}

if err = tx.Commit(); err != nil {
return "", fmt.Errorf("failed to commit cluster creation: %w", err)
}

return clusterID, nil
}

// bulkImportFilesForCluster imports all WAV files for a single cluster
func bulkImportFilesForCluster(database *sql.DB, logger *progressLogger, folderPath, datasetID, locationID, clusterID string) (*bulkImportStats, error) {
stats := &bulkImportStats{}

// Check if directory exists
if _, err := os.Stat(folderPath); os.IsNotExist(err) {
logger.Log(" WARNING: Directory not found, skipping")
return stats, nil
}

// Import the cluster (SAME LOGIC AS import_files.go)
logger.Log(" Importing cluster %s", clusterID)
FolderPath: folderPath,
DatasetID: datasetID,
LocationID: locationID,
ClusterID: clusterID,
Recursive: true,
})
if err != nil {
return nil, err
}

// Map to bulk import stats
stats.TotalFiles = clusterOutput.TotalFiles
stats.ImportedFiles = clusterOutput.ImportedFiles
stats.DuplicateFiles = clusterOutput.SkippedFiles
stats.ErrorFiles = clusterOutput.FailedFiles

// Log errors
for i, fileErr := range clusterOutput.Errors {
if i < 5 { // Log first 5
logger.Log(" ERROR: %s: %s", fileErr.FileName, fileErr.Error)
}
}

logger.Log(" Complete: %d imported, %d duplicates, %d errors", stats.ImportedFiles, stats.DuplicateFiles, stats.ErrorFiles)

return stats, nil
}
if err := tx.Commit(); err != nil {
return nil, fmt.Errorf("transaction commit failed: %w", err)
}

tx.Rollback()

ctx := context.Background()
tx, err := db.BeginLoggedTx(ctx, database, "import_audio_files")
if err != nil {
return nil, fmt.Errorf("failed to begin transaction: %w", err)
}

clusterOutput, err := utils.ImportCluster(database, tx.UnderlyingTx(), utils.ClusterImportInput{

// bulkCreateClusters creates or validates clusters for all locations.
// Returns the cluster ID map, counts of created/existing clusters, and any error.
func bulkCreateClusters(ctx context.Context, database *sql.DB, logger *progressLogger, locations []bulkLocationData, datasetID string) (map[string]string, int, int, error) {
clusterIDMap := make(map[string]string)
created := 0
existing := 0

for i, loc := range locations {
logger.Log("[%d/%d] Processing location: %s", i+1, len(locations), loc.LocationName)

var existingClusterID string
err := database.QueryRow(`
SELECT id FROM cluster
WHERE location_id = ? AND name = ? AND active = true
`, loc.LocationID, loc.DateRange).Scan(&existingClusterID)
}

// bulkValidateLocations validates that all location_ids in the CSV belong to the dataset.
// Returns an error if validation fails.
readDB, err := db.OpenReadOnlyDB(dbPath)
if err != nil {
logger.Log("ERROR: Failed to open database: %v", err)
return fmt.Errorf("failed to open database: %w", err)
}

locationErrors := bulkValidateLocationsBelongToDataset(readDB, locations, datasetID)
readDB.Close()

if len(locationErrors) > 0 {
for _, locErr := range locationErrors {
logger.Log("ERROR: %s", locErr)
}
return fmt.Errorf("location validation failed: %d location(s) do not belong to dataset %s", len(locationErrors), datasetID)
}
return nil
func bulkValidateLocations(logger *progressLogger, locations []bulkLocationData, datasetID string, dbPath string) error {
database, err := db.OpenReadOnlyDB(resolveDBPath(input.DBPath))
// Phase 3: Import files
output.ClustersCreated = created
output.ClustersExisting = existing
database, err := db.OpenWriteableDB(resolveDBPath(input.DBPath))
if err := bulkValidateLocations(logger, locations, input.DatasetID, resolveDBPath(input.DBPath)); err != nil {
}

// BulkFileImport imports WAV files across multiple locations using CSV specification
// failOutput sets error details and processing time on the output before returning.
func (o *BulkFileImportOutput) failOutput(errs []string, startTime time.Time) {
o.Errors = errs
o.ProcessingTime = time.Since(startTime).String()
DBPath string `json:"db_path"`
edit in tools/update_test.go at line 65

[6.250266]→[6.250266:250286](∅→∅)

SetDBPath(dbPath)
edit in tools/update_test.go at line 71

[6.250436]

[6.250436]

DBPath: dbPath,
edit in tools/update_test.go at line 97

[6.251252]

[6.251252]

DBPath: dbPath,
edit in tools/update_test.go at line 125

[6.252154]→[6.252154:252174](∅→∅)

SetDBPath(dbPath)
replacement in tools/update_test.go at line 127

[6.252227]→[6.252227:252319](∅→∅)

dsCreated, err := CreateOrUpdateDataset(context.Background(), DatasetInput{Name: &dsName})

[6.252227]

[6.252319]

dsCreated, err := CreateOrUpdateDataset(context.Background(), DatasetInput{DBPath: dbPath, Name: &dsName})
edit in tools/update_test.go at line 139

[6.252577]

[6.252577]

DBPath: dbPath,
edit in tools/update_test.go at line 165

[6.253317]

[6.253317]

DBPath: dbPath,
edit in tools/update_test.go at line 199

[6.254530]→[6.254530:254550](∅→∅)

SetDBPath(dbPath)
replacement in tools/update_test.go at line 201

[6.254608]→[6.254608:254700](∅→∅)

dsCreated, err := CreateOrUpdateDataset(context.Background(), DatasetInput{Name: &dsName})

[6.254608]

[6.254700]

dsCreated, err := CreateOrUpdateDataset(context.Background(), DatasetInput{DBPath: dbPath, Name: &dsName})
edit in tools/update_test.go at line 210

[6.254931]

[6.254931]

DBPath: dbPath,
edit in tools/update_test.go at line 226

[6.255277]

[6.255277]

DBPath: dbPath,
edit in tools/update_test.go at line 243

[6.255717]

[6.255717]

DBPath: dbPath,
edit in tools/update_test.go at line 271

[6.256635]→[6.256635:256655](∅→∅)

SetDBPath(dbPath)
edit in tools/update_test.go at line 275

[6.256749]

[6.256749]

DBPath: dbPath,
edit in tools/update_test.go at line 297

[6.257297]

[6.257297]

DBPath: dbPath,
edit in tools/update_test.go at line 321

[6.257999]→[6.257999:258019](∅→∅)

SetDBPath(dbPath)
replacement in tools/update_test.go at line 324

[6.258065]→[6.258065:258153](∅→∅)

created, err := CreateOrUpdateDataset(context.Background(), DatasetInput{Name: &name})

[6.258065]

[6.258153]

created, err := CreateOrUpdateDataset(context.Background(), DatasetInput{DBPath: dbPath, Name: &name})
replacement in tools/update_test.go at line 331

[6.258291]→[6.258291:258318](∅→∅)

ID: &created.Dataset.ID,

[6.258291]

[6.258318]

DBPath: dbPath,
ID: &created.Dataset.ID,
edit in tools/sql.go at line 14

[6.259494]→[6.259494:259544](∅→∅),[6.259544]→[4.55:144](∅→∅),[4.144]→[6.259544:259621](∅→∅),[6.259544]→[6.259544:259621](∅→∅),[6.259621]→[4.145:234](∅→∅),[4.234]→[6.259666:259711](∅→∅),[6.259666]→[6.259666:259711](∅→∅),[6.259711]→[4.235:237](∅→∅)

// Package-level variable to store database path
// Deprecated: use Input.DBPath instead. Will be removed after all callers are migrated.
var dbPath string

// SetDBPath sets the database path for the tools package
// Deprecated: use Input.DBPath instead. Will be removed after all callers are migrated.
func SetDBPath(path string) {
dbPath = path
}
replacement in tools/sql.go at line 15

[4.238]→[4.238:444](∅→∅)

// resolveDBPath returns the DBPath from the input if set, otherwise falls back
// to the package-level dbPath. This supports the incremental migration from
// the global variable to explicit input fields.

[4.238]

[4.444]

// resolveDBPath returns the DBPath from the input if set, otherwise returns
// the empty string. Callers that need a fallback should use db.ResolveDBPath.
replacement in tools/sql.go at line 18

[4.490]→[4.490:549](∅→∅)

if inputPath != "" {
return inputPath
}
return dbPath

[4.490]

[6.259711]

return db.ResolveDBPath(inputPath, "")
edit in tools/pattern_test.go at line 16

[6.276774]→[6.276774:276794](∅→∅)

SetDBPath(testDB)
edit in tools/pattern_test.go at line 24

[6.277050]

[6.277050]

DBPath: testDB,
edit in tools/pattern_test.go at line 58

[6.277928]

[6.277928]

DBPath: testDB,
replacement in tools/pattern_test.go at line 82

[6.278573]→[6.278573:278626](∅→∅)

output2, err2 := CreateOrUpdatePattern(ctx, input)

[6.278573]

[6.278626]

input2 := PatternInput{
DBPath: testDB,
RecordSeconds: &record,
SleepSeconds: &sleep,
}

output2, err2 := CreateOrUpdatePattern(ctx, input2)
edit in tools/pattern_test.go at line 106

[6.279169]→[6.279169:279189](∅→∅)

SetDBPath(testDB)
edit in tools/pattern_test.go at line 125

[6.279681]

[6.279681]

DBPath: testDB,
edit in tools/pattern_test.go at line 143

[6.280148]→[6.280148:280168](∅→∅)

SetDBPath(testDB)
edit in tools/pattern_test.go at line 149

[6.280318]

[6.280318]

DBPath: testDB,
replacement in tools/pattern_test.go at line 163

[6.280594]→[6.280594:280606](∅→∅)

ID: &id,

[6.280594]

[6.280606]

DBPath: testDB,
ID: &id,
edit in tools/integration_test.go at line 15

[6.304268]→[6.304268:304287](∅→∅)

SetDBPath(testDB)
replacement in tools/integration_test.go at line 18

[6.304420]→[6.3948:3984](∅→∅)

testQueryExistingPatterns(t, ctx)

[6.304420]

[6.304955]

testQueryExistingPatterns(t, ctx, testDB)
replacement in tools/integration_test.go at line 22

[6.305071]→[6.3985:4024](∅→∅)

testCreateClusterWithPattern(t, ctx)

[6.305071]

[6.4024]

testCreateClusterWithPattern(t, ctx, testDB)
replacement in tools/integration_test.go at line 26

[6.305432]→[6.4031:4099](∅→∅)

func testQueryExistingPatterns(t *testing.T, ctx context.Context) {

[6.305432]

[6.4099]

func testQueryExistingPatterns(t *testing.T, ctx context.Context, testDB string) {
replacement in tools/integration_test.go at line 29

[6.4138]→[6.4138:4256](∅→∅)

Query: "SELECT id, record_s, sleep_s FROM cyclic_recording_pattern WHERE active = true ORDER BY record_s, sleep_s",

[6.4138]

[6.4256]

DBPath: testDB,
Query: "SELECT id, record_s, sleep_s FROM cyclic_recording_pattern WHERE active = true ORDER BY record_s, sleep_s",
replacement in tools/integration_test.go at line 48

[6.306843]→[6.4635:4706](∅→∅)

func testCreateClusterWithPattern(t *testing.T, ctx context.Context) {

[6.306843]

[6.4706]

func testCreateClusterWithPattern(t *testing.T, ctx context.Context, testDB string) {
replacement in tools/integration_test.go at line 52

[6.4799]→[6.4799:4862](∅→∅)

Query: "SELECT id FROM dataset WHERE active = true LIMIT 1",

[6.4799]

[6.4862]

DBPath: testDB,
Query: "SELECT id FROM dataset WHERE active = true LIMIT 1",
edit in tools/integration_test.go at line 62

[6.5107]

[6.5107]

DBPath: testDB,
edit in tools/integration_test.go at line 75

[6.5538]

[6.5538]

DBPath: testDB,
edit in tools/integration_test.go at line 90

[6.6041]

[6.6041]

DBPath: testDB,
file addition: import (d--r------)

[6.248737]
file addition: import_unstructured.go (----------)

[0.1]

package imp

import (
"context"
"database/sql"
"fmt"
"io/fs"
"os"
"path/filepath"
"strings"
"time"

"skraak/db"
"skraak/utils"
)

// ImportUnstructuredInput defines the input parameters for importing files into an unstructured dataset
type ImportUnstructuredInput struct {
DBPath string `json:"db_path"`
DatasetID string `json:"dataset_id"`
FolderPath string `json:"folder_path"`
Recursive *bool `json:"recursive,omitempty"`
}

// ImportUnstructuredOutput defines the output structure
type ImportUnstructuredOutput struct {
TotalFiles int `json:"total_files"`
ImportedFiles int `json:"imported_files"`
SkippedFiles int `json:"skipped_files"` // Duplicates
FailedFiles int `json:"failed_files"`
TotalDuration float64 `json:"total_duration_seconds"`
ProcessingTime string `json:"processing_time"`
Errors []utils.FileImportError `json:"errors,omitempty"`
}

// ImportUnstructured imports WAV files into an unstructured dataset
// Files are stored with minimal metadata: hash, duration, sample_rate, file_mod_time as timestamp
// No location/cluster hierarchy, no astronomical data, no AudioMoth parsing
func ImportUnstructured(
ctx context.Context,
input ImportUnstructuredInput,
) (ImportUnstructuredOutput, error) {
startTime := time.Now()
var output ImportUnstructuredOutput

// Default recursive to true
recursive := true
if input.Recursive != nil {
recursive = *input.Recursive
}

// Validate input
if err := validateUnstructuredInput(input); err != nil {
return output, fmt.Errorf("validation failed: %w", err)
}

// Scan for WAV files (no DB needed)
files, scanErrors := scanWavFiles(input.FolderPath, recursive)
output.Errors = append(output.Errors, scanErrors...)
output.TotalFiles = len(files)

if len(files) == 0 {
output.ProcessingTime = time.Since(startTime).String()
return output, nil
}

err := db.WithWriteTx(ctx, db.ResolveDBPath(input.DBPath, ""), "import_unstructured", func(database *sql.DB, tx *db.LoggedTx) error {
// Process each file
for _, filePath := range files {
fileResult, procErr := processUnstructuredFile(tx, filePath, input.DatasetID)

if procErr != nil {
output.FailedFiles++
output.Errors = append(output.Errors, utils.FileImportError{
FileName: filepath.Base(filePath),
Error: procErr.Error(),
Stage: utils.StageProcess,
})
continue
}

if fileResult.Skipped {
output.SkippedFiles++
} else {
output.ImportedFiles++
output.TotalDuration += fileResult.Duration
}
}
return nil
})
if err != nil {
return output, err
}

output.ProcessingTime = time.Since(startTime).String()
return output, nil
}

// unstructuredFileResult holds the result of processing a single file
type unstructuredFileResult struct {
Skipped bool // True if duplicate
Duration float64 // Duration in seconds
}

// processUnstructuredFile processes a single WAV file for unstructured import
func processUnstructuredFile(tx *db.LoggedTx, filePath, datasetID string) (*unstructuredFileResult, error) {
result := &unstructuredFileResult{}

// Step 1: Parse WAV header
metadata, err := utils.ParseWAVHeader(filePath)
if err != nil {
return nil, fmt.Errorf("WAV header parsing failed: %w", err)
}

// Step 2: Calculate hash
hash, err := utils.ComputeXXH64(filePath)
if err != nil {
return nil, fmt.Errorf("hash calculation failed: %w", err)
}

// Step 3: Check for duplicate - if exists, skip entirely (do not link to dataset)
_, isDuplicate, err := utils.CheckDuplicateHash(tx, hash)
if err != nil {
return nil, fmt.Errorf("duplicate check failed: %w", err)
}
if isDuplicate {
// File already exists in database - skip completely, do not link to dataset
result.Skipped = true
result.Duration = metadata.Duration
return result, nil
}

// Step 4: Generate file ID
fileID, err := utils.GenerateLongID()
if err != nil {
return nil, fmt.Errorf("ID generation failed: %w", err)
}

// Step 5: Use file modification time as timestamp (no timezone conversion)
timestamp := metadata.FileModTime

// Step 6: Insert into file table
_, err = tx.Exec(`
INSERT INTO file (
id, file_name, xxh64_hash, location_id, cluster_id,
timestamp_local, duration, sample_rate,
maybe_solar_night, maybe_civil_night, moon_phase,
active
) VALUES (?, ?, ?, NULL, NULL, ?, ?, ?, NULL, NULL, NULL, TRUE)
`,
fileID,
filepath.Base(filePath),
hash,
timestamp,
metadata.Duration,
metadata.SampleRate,
)
if err != nil {
return nil, fmt.Errorf("file insert failed: %w", err)
}

// Step 7: Insert into file_dataset table
_, err = tx.Exec(
"INSERT INTO file_dataset (file_id, dataset_id) VALUES (?, ?)",
fileID, datasetID,
)
if err != nil {
return nil, fmt.Errorf("file_dataset insert failed: %w", err)
}

result.Duration = metadata.Duration
return result, nil
}

// validateUnstructuredInput validates the input parameters
func validateUnstructuredInput(input ImportUnstructuredInput) error {
// Validate dataset ID format
if err := utils.ValidateShortID(input.DatasetID, "dataset_id"); err != nil {
return err
}

// Verify folder exists
info, err := os.Stat(input.FolderPath)
if err != nil {
return fmt.Errorf("folder not accessible: %w", err)
}
if !info.IsDir() {
return fmt.Errorf("path is not a directory: %s", input.FolderPath)
}

return db.WithReadDB(db.ResolveDBPath(input.DBPath, ""), func(database *sql.DB) error {
// Verify dataset exists and is active
if _, err := db.DatasetExistsAndActive(database, input.DatasetID); err != nil {
return err
}

// Verify dataset is 'unstructured' type
if err := db.ValidateDatasetTypeUnstructured(database, input.DatasetID); err != nil {
return err
}

return nil
})
}

// scanWavFiles scans a folder for WAV files
func scanWavFiles(folderPath string, recursive bool) ([]string, []utils.FileImportError) {
var files []string
var errors []utils.FileImportError

walkFunc := func(path string, d fs.DirEntry, err error) error {
if err != nil {
errors = append(errors, utils.FileImportError{
FileName: path,
Error: err.Error(),
Stage: utils.StageScan,
})
return nil
}

// Skip directories if not recursive
if d.IsDir() {
if !recursive && path != folderPath {
return fs.SkipDir
}
return nil
}

// Check for .wav extension (case-insensitive)
if strings.HasSuffix(strings.ToLower(d.Name()), ".wav") {
files = append(files, path)
}

return nil
}

if recursive {
if err := filepath.WalkDir(folderPath, walkFunc); err != nil {
errors = append(errors, utils.FileImportError{
FileName: folderPath,
Error: err.Error(),
Stage: utils.StageScan,
})
}
} else {
// Non-recursive: only scan top-level
entries, err := os.ReadDir(folderPath)
if err != nil {
errors = append(errors, utils.FileImportError{
FileName: folderPath,
Error: err.Error(),
Stage: utils.StageScan,
})
return nil, errors
}

for _, entry := range entries {
if !entry.IsDir() && strings.HasSuffix(strings.ToLower(entry.Name()), ".wav") {
files = append(files, filepath.Join(folderPath, entry.Name()))
}
}
}

return files, errors
}
file addition: import_segments_test.go (----------)

[0.1]

package imp

import (
"testing"

"skraak/utils"
)

func TestValidateSegmentImportInput(t *testing.T) {
t.Run("invalid dataset ID - too short", func(t *testing.T) {
input := ImportSegmentsInput{
DatasetID: "abc",
}
err := validateSegmentImportInput(input)
if err == nil {
t.Fatal("expected error for short dataset ID")
}
})

t.Run("invalid dataset ID - too long", func(t *testing.T) {
input := ImportSegmentsInput{
DatasetID: "abc123def456ghi789",
}
err := validateSegmentImportInput(input)
if err == nil {
t.Fatal("expected error for long dataset ID")
}
})

t.Run("invalid dataset ID - invalid characters", func(t *testing.T) {
input := ImportSegmentsInput{
DatasetID: "abc123!!!456",
}
err := validateSegmentImportInput(input)
if err == nil {
t.Fatal("expected error for invalid characters in dataset ID")
}
})

t.Run("invalid location ID", func(t *testing.T) {
input := ImportSegmentsInput{
DatasetID: "abc123def456",
LocationID: "invalid",
}
err := validateSegmentImportInput(input)
if err == nil {
t.Fatal("expected error for invalid location ID")
}
})

t.Run("invalid cluster ID", func(t *testing.T) {
input := ImportSegmentsInput{
DatasetID: "abc123def456",
LocationID: "xyz789uvw012",
ClusterID: "invalid",
}
err := validateSegmentImportInput(input)
if err == nil {
t.Fatal("expected error for invalid cluster ID")
}
})
}

func TestCountTotalSegments(t *testing.T) {
t.Run("empty", func(t *testing.T) {
count := countTotalSegments(map[string]scannedDataFile{})
if count != 0 {
t.Errorf("expected 0, got %d", count)
}
})

t.Run("single file - no segments", func(t *testing.T) {
files := map[string]scannedDataFile{
"file1": {Segments: []*utils.Segment{}},
}
count := countTotalSegments(files)
if count != 0 {
t.Errorf("expected 0, got %d", count)
}
})

t.Run("single file - multiple segments", func(t *testing.T) {
files := map[string]scannedDataFile{
"file1": {Segments: []*utils.Segment{{}, {}, {}}},
}
count := countTotalSegments(files)
if count != 3 {
t.Errorf("expected 3, got %d", count)
}
})

t.Run("multiple files", func(t *testing.T) {
files := map[string]scannedDataFile{
"file1": {Segments: []*utils.Segment{{}, {}}},
"file2": {Segments: []*utils.Segment{{}}},
"file3": {Segments: []*utils.Segment{{}, {}, {}, {}}},
}
count := countTotalSegments(files)
if count != 7 {
t.Errorf("expected 7, got %d", count)
}
})
}
file addition: import_segments.go (----------)

[0.1]

package imp

import (
"context"
"database/sql"
"fmt"
"os"
"path/filepath"
"strings"
"time"

"skraak/db"
"skraak/utils"
)

// ImportSegmentsInput defines the input parameters for the import_segments tool
type ImportSegmentsInput struct {
DBPath string `json:"db_path"`
Folder string `json:"folder"`
Mapping string `json:"mapping"`
DatasetID string `json:"dataset_id"`
LocationID string `json:"location_id"`
ClusterID string `json:"cluster_id"`
ProgressHandler func(processed, total int, message string)
}

// ImportSegmentsOutput defines the output structure for the import_segments tool
type ImportSegmentsOutput struct {
Summary ImportSegmentsSummary `json:"summary"`
Segments []SegmentImport `json:"segments"`
Errors []ImportSegmentError `json:"errors,omitempty"`
}

// ImportSegmentsSummary provides summary statistics for the import operation
type ImportSegmentsSummary struct {
DataFilesFound int `json:"data_files_found"`
DataFilesProcessed int `json:"data_files_processed"`
TotalSegments int `json:"total_segments"`
ImportedSegments int `json:"imported_segments"`
ImportedLabels int `json:"imported_labels"`
ImportedSubtypes int `json:"imported_subtypes"`
ProcessingTimeMs int64 `json:"processing_time_ms"`
}

// SegmentImport represents an imported segment in the output
type SegmentImport struct {
SegmentID string `json:"segment_id"`
FileName string `json:"file_name"`
StartTime float64 `json:"start_time"`
EndTime float64 `json:"end_time"`
FreqLow float64 `json:"freq_low"`
FreqHigh float64 `json:"freq_high"`
Labels []LabelImport `json:"labels"`
}

// LabelImport represents an imported label in the output
type LabelImport struct {
LabelID string `json:"label_id"`
Species string `json:"species"`
CallType string `json:"calltype,omitempty"`
Filter string `json:"filter"`
Certainty int `json:"certainty"`
Comment string `json:"comment,omitempty"`
}

// ImportSegmentError records errors encountered during segment import
type ImportSegmentError struct {
File string `json:"file,omitempty"`
Stage utils.ImportStage `json:"stage"`
Message string `json:"message"`
}

// scannedDataFile holds parsed data for a .data file
type scannedDataFile struct {
DataPath string
WavPath string
WavHash string
FileID string
Duration float64
Segments []*utils.Segment
}

// segmentValidation holds the results of pre-import validation (phases B+C).
type segmentValidation struct {
scannedFiles []scannedDataFile
filterIDMap map[string]string
speciesIDMap map[string]string
calltypeIDMap map[string]map[string]string
fileIDMap map[string]scannedDataFile
}

// validateAndPrepareSegments performs phases B+C: parse data files, validate DB state, and prepare ID maps.
func validateAndPrepareSegments(
database *sql.DB,
input ImportSegmentsInput,
mapping utils.MappingFile,
dataFiles []string,
) (*segmentValidation, []ImportSegmentError, error) {
// Phase B: Parse all .data files and collect unique values
scannedFiles, parseErrors, uniqueFilters, uniqueSpecies, uniqueCalltypes := scanAllDataFiles(dataFiles, input.Folder)
if len(scannedFiles) == 0 {
return nil, parseErrors, nil
}

// Validate dataset/location/cluster hierarchy
if err := validateSegmentHierarchy(database, input.DatasetID, input.LocationID, input.ClusterID); err != nil {
return nil, parseErrors, err
}

// Validate all filters exist
filterIDMap, err := validateFiltersExist(database, uniqueFilters)
if err != nil {
return nil, parseErrors, fmt.Errorf("filter validation failed: %w", err)
}

// Validate mapping covers all species/calltypes and they exist in DB
validationResult, err := utils.ValidateMappingAgainstDB(database, mapping, uniqueSpecies, uniqueCalltypes)
if err != nil {
return nil, parseErrors, fmt.Errorf("mapping validation failed: %w", err)
}
if validationResult.HasErrors() {
return nil, parseErrors, fmt.Errorf("mapping validation failed: %s", validationResult.Error())
}

// Load species and calltype ID maps
speciesIDMap, calltypeIDMap, err := loadSpeciesCalltypeIDs(database, mapping, uniqueSpecies, uniqueCalltypes)
if err != nil {
return nil, parseErrors, fmt.Errorf("failed to load species/calltype IDs: %w", err)
}

// Validate files: hash exists, linked to dataset, no existing labels
fileIDMap, hashErrors := validateAndMapFiles(database, scannedFiles, input.ClusterID, input.DatasetID)
allErrors := append(parseErrors, hashErrors...)

return &segmentValidation{
scannedFiles: scannedFiles,
filterIDMap: filterIDMap,
speciesIDMap: speciesIDMap,
calltypeIDMap: calltypeIDMap,
fileIDMap: fileIDMap,
}, allErrors, nil
}

// ImportSegments imports segments from AviaNZ .data files into the database
func ImportSegments(ctx context.Context, input ImportSegmentsInput) (ImportSegmentsOutput, error) {
startTime := time.Now()
var output ImportSegmentsOutput
output.Segments = make([]SegmentImport, 0)
output.Errors = make([]ImportSegmentError, 0)

// Phase A: Input Validation
if err := validateSegmentImportInput(input); err != nil {
return output, err
}

// Load mapping file
mapping, err := utils.LoadMappingFile(input.Mapping)
if err != nil {
return output, fmt.Errorf("failed to load mapping file: %w", err)
}

// Find .data files
dataFiles, err := utils.FindDataFiles(input.Folder)
if err != nil {
return output, fmt.Errorf("failed to find .data files: %w", err)
}
output.Summary.DataFilesFound = len(dataFiles)

if len(dataFiles) == 0 {
return output, fmt.Errorf("no .data files found in folder: %s", input.Folder)
}

// Phase B+C: Parse data files and validate against DB
database, err := db.OpenWriteableDB(db.ResolveDBPath(input.DBPath, ""))
if err != nil {
return output, fmt.Errorf("failed to open database: %w", err)
}
defer database.Close()

val, valErrors, err := validateAndPrepareSegments(database, input, mapping, dataFiles)
output.Errors = append(output.Errors, valErrors...)
if err != nil {
return output, err
}
if val == nil || len(val.fileIDMap) == 0 {
output.Summary.ProcessingTimeMs = time.Since(startTime).Milliseconds()
return output, nil
}

// Phase D: Transactional Import
importedSegments, importedLabels, importedSubtypes, fileUpdates, importErrors := importSegmentsIntoDB(
ctx, database, val.fileIDMap, val.scannedFiles, mapping, val.filterIDMap, val.speciesIDMap, val.calltypeIDMap, input.DatasetID, input.ProgressHandler,
)
output.Errors = append(output.Errors, importErrors...)
output.Segments = append(output.Segments, importedSegments...)

// Phase E: Write IDs back to .data files
if len(fileUpdates) > 0 {
writeErrors := writeIDsToDataFiles(fileUpdates)
output.Errors = append(output.Errors, writeErrors...)
}

output.Summary.DataFilesProcessed = len(val.fileIDMap)
output.Summary.TotalSegments = countTotalSegments(val.fileIDMap)
output.Summary.ImportedSegments = len(importedSegments)
output.Summary.ImportedLabels = importedLabels
output.Summary.ImportedSubtypes = importedSubtypes
output.Summary.ProcessingTimeMs = time.Since(startTime).Milliseconds()

return output, nil
}

// validateSegmentImportInput validates input parameters
func validateSegmentImportInput(input ImportSegmentsInput) error {
// Validate folder exists
if info, err := os.Stat(input.Folder); err != nil {
return fmt.Errorf("folder does not exist: %s", input.Folder)
} else if !info.IsDir() {
return fmt.Errorf("path is not a folder: %s", input.Folder)
}

// Validate mapping file exists
if _, err := os.Stat(input.Mapping); err != nil {
return fmt.Errorf("mapping file does not exist: %s", input.Mapping)
}

// Validate IDs
if err := utils.ValidateShortID(input.DatasetID, "dataset_id"); err != nil {
return err
}
if err := utils.ValidateShortID(input.LocationID, "location_id"); err != nil {
return err
}
if err := utils.ValidateShortID(input.ClusterID, "cluster_id"); err != nil {
return err
}

return nil
}

// validateSegmentHierarchy validates dataset/location/cluster relationships
func validateSegmentHierarchy(dbConn *sql.DB, datasetID, locationID, clusterID string) error {
// Validate dataset exists and is structured
if err := db.ValidateDatasetTypeForImport(dbConn, datasetID); err != nil {
return err
}

// Validate location belongs to dataset
if err := db.ValidateLocationBelongsToDataset(dbConn, locationID, datasetID); err != nil {
return err
}

// Validate cluster belongs to location
if err := db.ClusterBelongsToLocation(dbConn, clusterID, locationID); err != nil {
return err
}

return nil
}

// scanAllDataFiles parses all .data files and collects unique values
func scanAllDataFiles(dataFiles []string, folder string) (
[]scannedDataFile,
[]ImportSegmentError,
map[string]bool,
map[string]bool,
map[string]map[string]bool,
) {
var scanned []scannedDataFile
var errors []ImportSegmentError
uniqueFilters := make(map[string]bool)
uniqueSpecies := make(map[string]bool)
uniqueCalltypes := make(map[string]map[string]bool) // species -> calltype -> true

for _, dataPath := range dataFiles {
// Find corresponding WAV file
wavPath := strings.TrimSuffix(dataPath, ".data")
if _, err := os.Stat(wavPath); err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(dataPath),
Stage: utils.StageValidation,
Message: fmt.Sprintf("corresponding WAV file not found: %s", filepath.Base(wavPath)),
})
continue
}

// Parse .data file
df, err := utils.ParseDataFile(dataPath)
if err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(dataPath),
Stage: utils.StageValidation,
Message: fmt.Sprintf("failed to parse .data file: %v", err),
})
continue
}

// Collect unique filters, species, calltypes
for _, seg := range df.Segments {
for _, label := range seg.Labels {
uniqueFilters[label.Filter] = true
uniqueSpecies[label.Species] = true
if label.CallType != "" {
if uniqueCalltypes[label.Species] == nil {
uniqueCalltypes[label.Species] = make(map[string]bool)
}
uniqueCalltypes[label.Species][label.CallType] = true
}
}
}

scanned = append(scanned, scannedDataFile{
DataPath: dataPath,
WavPath: wavPath,
Duration: df.Meta.Duration,
Segments: df.Segments,
})
}

return scanned, errors, uniqueFilters, uniqueSpecies, uniqueCalltypes
}

// validateFiltersExist checks all filters exist in DB and returns ID map
func validateFiltersExist(dbConn *sql.DB, filterNames map[string]bool) (map[string]string, error) {
filterIDMap := make(map[string]string)

if len(filterNames) == 0 {
return filterIDMap, nil
}

names := make([]string, 0, len(filterNames))
for name := range filterNames {
names = append(names, name)
}

query := `SELECT id, name FROM filter WHERE name IN (` + db.Placeholders(len(names)) + `) AND active = true`
args := make([]any, len(names))
for i, name := range names {
args[i] = name
}

rows, err := dbConn.Query(query, args...)
if err != nil {
return nil, fmt.Errorf("failed to query filters: %w", err)
}
defer rows.Close()

for rows.Next() {
var id, name string
if err := rows.Scan(&id, &name); err == nil {
filterIDMap[name] = id
}
}

// Check for missing filters
var missing []string
for name := range filterNames {
if _, exists := filterIDMap[name]; !exists {
missing = append(missing, name)
}
}

if len(missing) > 0 {
return nil, fmt.Errorf("filters not found in database: [%s]", strings.Join(missing, ", "))
}

return filterIDMap, nil
}

// loadSpeciesCalltypeIDs loads species and calltype ID maps
func loadSpeciesCalltypeIDs(
dbConn *sql.DB,
mapping utils.MappingFile,
uniqueSpecies map[string]bool,
uniqueCalltypes map[string]map[string]bool,
) (map[string]string, map[string]map[string]string, error) {
speciesIDMap := make(map[string]string)
calltypeIDMap := make(map[string]map[string]string) // (dbSpecies, dbCalltype) -> calltype_id

// Collect all DB species labels from mapping
dbSpeciesSet := make(map[string]bool)
for dataSpecies := range uniqueSpecies {
if dbSpecies, ok := mapping.GetDBSpecies(dataSpecies); ok {
dbSpeciesSet[dbSpecies] = true
}
}

// Load species IDs
if len(dbSpeciesSet) > 0 {
dbSpeciesList := make([]string, 0, len(dbSpeciesSet))
for s := range dbSpeciesSet {
dbSpeciesList = append(dbSpeciesList, s)
}

query := `SELECT id, label FROM species WHERE label IN (` + db.Placeholders(len(dbSpeciesList)) + `) AND active = true`
args := make([]any, len(dbSpeciesList))
for i, s := range dbSpeciesList {
args[i] = s
}

rows, err := dbConn.Query(query, args...)
if err != nil {
return nil, nil, fmt.Errorf("failed to query species: %w", err)
}
defer rows.Close()

for rows.Next() {
var id, label string
if err := rows.Scan(&id, &label); err == nil {
speciesIDMap[label] = id
}
}
}

// Load calltype IDs
for dataSpecies, ctSet := range uniqueCalltypes {
dbSpecies, ok := mapping.GetDBSpecies(dataSpecies)
if !ok {
continue
}

if calltypeIDMap[dbSpecies] == nil {
calltypeIDMap[dbSpecies] = make(map[string]string)
}

for dataCalltype := range ctSet {
dbCalltype := mapping.GetDBCalltype(dataSpecies, dataCalltype)

// Query calltype ID
var calltypeID string
err := dbConn.QueryRow(`
SELECT ct.id
FROM call_type ct
JOIN species s ON ct.species_id = s.id
WHERE s.label = ? AND ct.label = ? AND ct.active = true
`, dbSpecies, dbCalltype).Scan(&calltypeID)

if err == nil {
calltypeIDMap[dbSpecies][dbCalltype] = calltypeID
}
}
}

return speciesIDMap, calltypeIDMap, nil
}

// validateAndMapFiles validates files exist by hash, are linked to dataset, and have no existing labels
func validateAndMapFiles(
dbConn *sql.DB,
scannedFiles []scannedDataFile,
clusterID string,
datasetID string,
) (map[string]scannedDataFile, []ImportSegmentError) {
fileIDMap := make(map[string]scannedDataFile)
var errors []ImportSegmentError

for _, sf := range scannedFiles {
// Compute hash
hash, err := utils.ComputeXXH64(sf.WavPath)
if err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.WavPath),
Stage: utils.StageHash,
Message: fmt.Sprintf("failed to compute hash: %v", err),
})
continue
}
sf.WavHash = hash

// Find file by hash in cluster
var fileID string
var duration float64
err = dbConn.QueryRow(`
SELECT id, duration FROM file WHERE xxh64_hash = ? AND cluster_id = ? AND active = true
`, hash, clusterID).Scan(&fileID, &duration)

if err == sql.ErrNoRows {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.WavPath),
Stage: utils.StageValidation,
Message: fmt.Sprintf("file hash not found in database for cluster (hash: %s)", hash),
})
continue
}
if err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.WavPath),
Stage: utils.StageValidation,
Message: fmt.Sprintf("failed to query file: %v", err),
})
continue
}

sf.FileID = fileID
sf.Duration = duration

// Verify file is linked to dataset via file_dataset junction table (composite FK)
var fileLinkedToDataset bool
err = dbConn.QueryRow(`
SELECT EXISTS(SELECT 1 FROM file_dataset WHERE file_id = ? AND dataset_id = ?)
`, fileID, datasetID).Scan(&fileLinkedToDataset)
if err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.WavPath),
Stage: utils.StageValidation,
Message: fmt.Sprintf("failed to verify file-dataset link: %v", err),
})
continue
}
if !fileLinkedToDataset {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.WavPath),
Stage: utils.StageValidation,
Message: fmt.Sprintf("file exists in cluster but is not linked to dataset %s", datasetID),
})
continue
}

// Check no existing labels for this file
var labelCount int
err = dbConn.QueryRow(`
SELECT COUNT(*) FROM label l
JOIN segment s ON l.segment_id = s.id
WHERE s.file_id = ? AND l.active = true
`, fileID).Scan(&labelCount)

if err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.WavPath),
Stage: utils.StageValidation,
Message: fmt.Sprintf("failed to check existing labels: %v", err),
})
continue
}

if labelCount > 0 {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.WavPath),
Stage: utils.StageValidation,
Message: fmt.Sprintf("file already has %d label(s) - fresh imports only", labelCount),
})
continue
}

fileIDMap[fileID] = sf
}

return fileIDMap, errors
}

// dataFileUpdate holds data to write back to .data file after import
type dataFileUpdate struct {
DataPath string
WavHash string
LabelIDs map[int]map[int]string // segmentIndex -> labelIndex -> labelID
}

// importLabelResult holds the result of importing a single label.
type importLabelResult struct {
labelImport LabelImport
labelID string
subtypesImported int
err ImportSegmentError
hasError bool
}

// importSingleLabel inserts a single label and its metadata/subtype into the DB.
func importSingleLabel(
ctx context.Context,
tx *db.LoggedTx,
label *utils.Label,
segmentID string,
segIdx, labelIdx int,
sf scannedDataFile,
mapping utils.MappingFile,
filterIDMap map[string]string,
speciesIDMap map[string]string,
calltypeIDMap map[string]map[string]string,
) importLabelResult {
dbSpecies, ok := mapping.GetDBSpecies(label.Species)
if !ok {
return importLabelResult{err: ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("species not found in mapping: %s", label.Species),
}, hasError: true}
}

speciesID, ok := speciesIDMap[dbSpecies]
if !ok {
return importLabelResult{err: ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("species ID not found: %s", dbSpecies),
}, hasError: true}
}

filterID, ok := filterIDMap[label.Filter]
if !ok {
return importLabelResult{err: ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("filter ID not found: %s", label.Filter),
}, hasError: true}
}

labelID, err := utils.GenerateLongID()
if err != nil {
return importLabelResult{err: ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("failed to generate label ID: %v", err),
}, hasError: true}
}

_, err = tx.ExecContext(ctx, `
INSERT INTO label (id, segment_id, species_id, filter_id, certainty, created_at, last_modified, active)
VALUES (?, ?, ?, ?, ?, now(), now(), true)
`, labelID, segmentID, speciesID, filterID, label.Certainty)
if err != nil {
return importLabelResult{err: ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("failed to insert label: %v", err),
}, hasError: true}
}

// Insert label_metadata if comment exists
if label.Comment != "" {
escapedComment := strings.ReplaceAll(label.Comment, `"`, `\"`)
metadataJSON := fmt.Sprintf(`{"comment": "%s"}`, escapedComment)
if _, err := tx.ExecContext(ctx, `
INSERT INTO label_metadata (label_id, json, created_at, last_modified, active)
VALUES (?, ?, now(), now(), true)
`, labelID, metadataJSON); err != nil {
return importLabelResult{err: ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("failed to insert label_metadata: %v", err),
}, hasError: true}
}
}

labelImport := LabelImport{
LabelID: labelID,
Species: dbSpecies,
Filter: label.Filter,
Certainty: label.Certainty,
}
if label.Comment != "" {
labelImport.Comment = label.Comment
}

// Insert label_subtype if calltype exists
if label.CallType != "" {
if err := importCalltype(ctx, tx, labelID, label, dbSpecies, filterID, mapping, calltypeIDMap, sf); err != nil {
return importLabelResult{err: *err, hasError: true}
}
labelImport.CallType = mapping.GetDBCalltype(label.Species, label.CallType)
return importLabelResult{labelImport: labelImport, labelID: labelID, subtypesImported: 1}
}

return importLabelResult{labelImport: labelImport, labelID: labelID}
}

// importCalltype inserts a label_subtype row for a calltype label.
func importCalltype(
ctx context.Context,
tx *db.LoggedTx,
labelID string,
label *utils.Label,
dbSpecies string,
filterID string,
mapping utils.MappingFile,
calltypeIDMap map[string]map[string]string,
sf scannedDataFile,
) *ImportSegmentError {
dbCalltype := mapping.GetDBCalltype(label.Species, label.CallType)

calltypeID := ""
if calltypeIDMap[dbSpecies] != nil {
calltypeID = calltypeIDMap[dbSpecies][dbCalltype]
}
if calltypeID == "" {
return &ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("calltype ID not found: %s/%s", dbSpecies, dbCalltype),
}
}

subtypeID, err := utils.GenerateLongID()
if err != nil {
return &ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("failed to generate label_subtype ID: %v", err),
}
}

_, err = tx.ExecContext(ctx, `
INSERT INTO label_subtype (id, label_id, calltype_id, filter_id, certainty, created_at, last_modified, active)
VALUES (?, ?, ?, ?, ?, now(), now(), true)
`, subtypeID, labelID, calltypeID, filterID, label.Certainty)
if err != nil {
return &ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("failed to insert label_subtype: %v", err),
}
}
return nil
}

// importSegmentsIntoDB performs the transactional import
func importSegmentsIntoDB(
ctx context.Context,
database *sql.DB,
fileIDMap map[string]scannedDataFile,
scannedFiles []scannedDataFile,
mapping utils.MappingFile,
filterIDMap map[string]string,
speciesIDMap map[string]string,
calltypeIDMap map[string]map[string]string,
datasetID string,
progressHandler func(processed, total int, message string),
) ([]SegmentImport, int, int, []dataFileUpdate, []ImportSegmentError) {
var importedSegments []SegmentImport
var errors []ImportSegmentError
importedLabels := 0
importedSubtypes := 0
var fileUpdates []dataFileUpdate

tx, err := db.BeginLoggedTx(ctx, database, "import_segments")
if err != nil {
errors = append(errors, ImportSegmentError{
Stage: utils.StageImport,
Message: fmt.Sprintf("failed to begin transaction: %v", err),
})
return nil, 0, 0, nil, errors
}
defer tx.Rollback()

totalFiles := len(fileIDMap)
processedFiles := 0

for _, sf := range fileIDMap {
if sf.FileID == "" {
continue
}

processedFiles++
if progressHandler != nil {
progressHandler(processedFiles, totalFiles, filepath.Base(sf.DataPath))
}

fileUpdate := dataFileUpdate{
DataPath: sf.DataPath,
WavHash: sf.WavHash,
LabelIDs: make(map[int]map[int]string),
}

for segIdx, seg := range sf.Segments {
segImp, labelIDs, subtypes, segErrs := importSegment(ctx, tx, seg, segIdx, sf, datasetID, mapping, filterIDMap, speciesIDMap, calltypeIDMap)
errors = append(errors, segErrs...)
importedSubtypes += subtypes

if len(segImp.Labels) == 0 {
// Delete orphaned segment (no labels succeeded)
if _, err := tx.ExecContext(ctx, `DELETE FROM segment WHERE id = ?`, segImp.SegmentID); err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("failed to delete orphaned segment: %v", err),
})
}
} else {
importedSegments = append(importedSegments, segImp)
importedLabels += len(labelIDs)
fileUpdate.LabelIDs[segIdx] = labelIDs
}
}

fileUpdates = append(fileUpdates, fileUpdate)
}

if err := tx.Commit(); err != nil {
errors = append(errors, ImportSegmentError{
Stage: utils.StageImport,
Message: fmt.Sprintf("failed to commit transaction: %v", err),
})
return nil, 0, 0, nil, errors
}

return importedSegments, importedLabels, importedSubtypes, fileUpdates, errors
}

// importSegment inserts a single segment and its labels into the DB.
func importSegment(
ctx context.Context,
tx *db.LoggedTx,
seg *utils.Segment,
segIdx int,
sf scannedDataFile,
datasetID string,
mapping utils.MappingFile,
filterIDMap map[string]string,
speciesIDMap map[string]string,
calltypeIDMap map[string]map[string]string,
) (SegmentImport, map[int]string, int, []ImportSegmentError) {
var errors []ImportSegmentError

if seg.StartTime >= seg.EndTime {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("invalid segment bounds: start=%.2f >= end=%.2f", seg.StartTime, seg.EndTime),
})
return SegmentImport{}, nil, 0, errors
}

if seg.EndTime > sf.Duration {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("segment end time (%.2f) exceeds file duration (%.2f)", seg.EndTime, sf.Duration),
})
return SegmentImport{}, nil, 0, errors
}

segmentID, err := utils.GenerateLongID()
if err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("failed to generate segment ID: %v", err),
})
return SegmentImport{}, nil, 0, errors
}

_, err = tx.ExecContext(ctx, `
INSERT INTO segment (id, file_id, dataset_id, start_time, end_time, freq_low, freq_high, created_at, last_modified, active)
VALUES (?, ?, ?, ?, ?, ?, ?, now(), now(), true)
`, segmentID, sf.FileID, datasetID, seg.StartTime, seg.EndTime, seg.FreqLow, seg.FreqHigh)
if err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
Message: fmt.Sprintf("failed to insert segment: %v", err),
})
return SegmentImport{}, nil, 0, errors
}

segImport := SegmentImport{
SegmentID: segmentID,
FileName: filepath.Base(sf.WavPath),
StartTime: seg.StartTime,
EndTime: seg.EndTime,
FreqLow: seg.FreqLow,
FreqHigh: seg.FreqHigh,
Labels: make([]LabelImport, 0),
}
labelIDs := make(map[int]string)
var subtypesImported int

for labelIdx, label := range seg.Labels {
result := importSingleLabel(ctx, tx, label, segmentID, segIdx, labelIdx, sf, mapping, filterIDMap, speciesIDMap, calltypeIDMap)
if result.hasError {
errors = append(errors, result.err)
continue
}
labelIDs[labelIdx] = result.labelID
segImport.Labels = append(segImport.Labels, result.labelImport)
subtypesImported += result.subtypesImported
}

return segImport, labelIDs, subtypesImported, errors
}

// countTotalSegments counts total segments from validated files
func countTotalSegments(fileIDMap map[string]scannedDataFile) int {
count := 0
for _, sf := range fileIDMap {
count += len(sf.Segments)
}
return count
}

// writeIDsToDataFiles writes skraak_hash and skraak_label_ids back to .data files
func writeIDsToDataFiles(fileUpdates []dataFileUpdate) []ImportSegmentError {
var errors []ImportSegmentError

for _, fu := range fileUpdates {
// Parse the .data file
df, err := utils.ParseDataFile(fu.DataPath)
if err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(fu.DataPath),
Stage: utils.StageImport,
Message: fmt.Sprintf("failed to re-parse .data file for writing: %v", err),
})
continue
}

// Write skraak_hash to metadata
if df.Meta.Extra == nil {
df.Meta.Extra = make(map[string]any)
}
df.Meta.Extra["skraak_hash"] = fu.WavHash

// Write skraak_label_id to each label
for segIdx, labelIDs := range fu.LabelIDs {
if segIdx >= len(df.Segments) {
continue
}
seg := df.Segments[segIdx]
for labelIdx, labelID := range labelIDs {
if labelIdx >= len(seg.Labels) {
continue
}
label := seg.Labels[labelIdx]
if label.Extra == nil {
label.Extra = make(map[string]any)
}
label.Extra["skraak_label_id"] = labelID
}
}

// Write the updated .data file
if err := df.Write(fu.DataPath); err != nil {
errors = append(errors, ImportSegmentError{
File: filepath.Base(fu.DataPath),
Stage: utils.StageImport,
Message: fmt.Sprintf("failed to write updated .data file: %v", err),
})
continue
}
}

return errors
}
file addition: import_files.go (----------)

[0.1]

package imp

import (
"context"
"database/sql"
"fmt"
"os"
"time"

"skraak/db"
"skraak/utils"
)

// ImportAudioFilesInput defines the input parameters for the import_audio_files tool
type ImportAudioFilesInput struct {
DBPath string `json:"db_path"`
FolderPath string `json:"folder_path"`
DatasetID string `json:"dataset_id"`
LocationID string `json:"location_id"`
ClusterID string `json:"cluster_id"`
Recursive *bool `json:"recursive,omitempty"` // *bool because default is true; plain bool would make "not provided" indistinguishable from "false"
}

// ImportAudioFilesOutput defines the output structure for the import_audio_files tool
type ImportAudioFilesOutput struct {
Summary ImportSummary `json:"summary"`
FileIDs []string `json:"file_ids"`
Errors []utils.FileImportError `json:"errors,omitempty"`
}

// ImportSummary provides summary statistics for the import operation
type ImportSummary struct {
TotalFiles int `json:"total_files"`
ImportedFiles int `json:"imported_files"`
SkippedFiles int `json:"skipped_files"` // Duplicates
FailedFiles int `json:"failed_files"`
AudioMothFiles int `json:"audiomoth_files"`
TotalDuration float64 `json:"total_duration_seconds"`
ProcessingTime string `json:"processing_time"`
}

// ImportAudioFiles batch imports WAV files from a folder with hash-based duplicate detection
func ImportAudioFiles(
ctx context.Context,
input ImportAudioFilesInput,
) (ImportAudioFilesOutput, error) {
startTime := time.Now()
var output ImportAudioFilesOutput

// Default recursive to true
recursive := true
if input.Recursive != nil {
recursive = *input.Recursive
}

// Validate database hierarchy (dataset → location → cluster)
if err := validateImportInput(input, db.ResolveDBPath(input.DBPath, "")); err != nil {
return output, fmt.Errorf("validation failed: %w", err)
}

// Open database
database, err := db.OpenWriteableDB(db.ResolveDBPath(input.DBPath, ""))
if err != nil {
return output, fmt.Errorf("failed to open database: %w", err)
}
defer database.Close()

// Set cluster path if empty
err = utils.EnsureClusterPath(database, input.ClusterID, input.FolderPath)
if err != nil {
return output, fmt.Errorf("failed to set cluster path: %w", err)
}

// Import the cluster (ALL THE LOGIC IS HERE)
tx, err := db.BeginLoggedTx(ctx, database, "import_audio_files")
if err != nil {
return output, fmt.Errorf("failed to begin transaction: %w", err)
}

clusterOutput, err := utils.ImportCluster(database, tx.UnderlyingTx(), utils.ClusterImportInput{
FolderPath: input.FolderPath,
DatasetID: input.DatasetID,
LocationID: input.LocationID,
ClusterID: input.ClusterID,
Recursive: recursive,
})
if err != nil {
tx.Rollback()
return output, fmt.Errorf("cluster import failed: %w", err)
}

if err := tx.Commit(); err != nil {
return output, fmt.Errorf("transaction commit failed: %w", err)
}

// Map to output format
output = ImportAudioFilesOutput{
Summary: ImportSummary{
TotalFiles: clusterOutput.TotalFiles,
ImportedFiles: clusterOutput.ImportedFiles,
SkippedFiles: clusterOutput.SkippedFiles,
FailedFiles: clusterOutput.FailedFiles,
AudioMothFiles: clusterOutput.AudioMothFiles,
TotalDuration: clusterOutput.TotalDuration,
ProcessingTime: time.Since(startTime).String(),
},
FileIDs: []string{}, // File IDs not tracked currently
Errors: clusterOutput.Errors,
}

return output, nil
}

// validateImportInput validates all input parameters and database relationships
func validateImportInput(input ImportAudioFilesInput, dbPath string) error {
// Verify folder exists
info, err := os.Stat(input.FolderPath)
if err != nil {
return fmt.Errorf("folder not accessible: %w", err)
}
if !info.IsDir() {
return fmt.Errorf("path is not a directory: %s", input.FolderPath)
}

return validateHierarchyIDs(input.DatasetID, input.LocationID, input.ClusterID, dbPath)
}

// validateHierarchyIDs validates dataset/location/cluster ID formats and database relationships
func validateHierarchyIDs(datasetID, locationID, clusterID, dbPath string) error {
// Validate ID formats first (fast fail before DB queries)
if err := utils.ValidateShortID(datasetID, "dataset_id"); err != nil {
return err
}
if err := utils.ValidateShortID(locationID, "location_id"); err != nil {
return err
}
if err := utils.ValidateShortID(clusterID, "cluster_id"); err != nil {
return err
}

return db.WithReadDB(dbPath, func(database *sql.DB) error {
// Verify dataset exists, is active, and is 'structured' type
if err := db.ValidateDatasetTypeForImport(database, datasetID); err != nil {
return err
}

// Verify location exists and belongs to dataset
if err := db.ValidateLocationBelongsToDataset(database, locationID, datasetID); err != nil {
return err
}

// Verify cluster exists and belongs to location
if err := db.ClusterBelongsToLocation(database, clusterID, locationID); err != nil {
return err
}

return nil
})
}
file addition: import_file.go (----------)

[0.1]

package imp

import (
"context"
"database/sql"
"fmt"
"os"
"path/filepath"
"strings"
"time"

"skraak/db"
"skraak/utils"
)

// ImportFileInput defines the input parameters for the import_file tool
type ImportFileInput struct {
DBPath string `json:"db_path"`
FilePath string `json:"file_path"`
DatasetID string `json:"dataset_id"`
LocationID string `json:"location_id"`
ClusterID string `json:"cluster_id"`
}

// ImportFileOutput defines the output structure for the import_file tool
type ImportFileOutput struct {
FileID string `json:"file_id"`
FileName string `json:"file_name"`
Hash string `json:"hash"`
Duration float64 `json:"duration_seconds"`
SampleRate int `json:"sample_rate"`
TimestampLocal time.Time `json:"timestamp_local"`
IsAudioMoth bool `json:"is_audiomoth"`
IsDuplicate bool `json:"is_duplicate"`
ProcessingTime string `json:"processing_time"`
Error *string `json:"error,omitempty"`
}

// ImportFile imports a single WAV file into the database with duplicate detection
func ImportFile(
ctx context.Context,
input ImportFileInput,
) (ImportFileOutput, error) {
startTime := time.Now()
var output ImportFileOutput

// Phase 1: Validate file path
_, err := validateFilePath(input.FilePath)
if err != nil {
return output, fmt.Errorf("file validation failed: %w", err)
}
output.FileName = filepath.Base(input.FilePath)

// Phase 2: Validate database hierarchy
if err := validateHierarchyIDs(input.DatasetID, input.LocationID, input.ClusterID, db.ResolveDBPath(input.DBPath, "")); err != nil {
return output, fmt.Errorf("hierarchy validation failed: %w", err)
}

// Phase 3: Open database connection (single connection for all DB operations)
database, err := db.OpenWriteableDB(db.ResolveDBPath(input.DBPath, ""))
if err != nil {
return output, fmt.Errorf("database connection failed: %w", err)
}
defer database.Close()

// Phase 4: Get location data for astronomical calculations
locData, err := utils.GetLocationData(database, input.LocationID)
if err != nil {
return output, fmt.Errorf("failed to get location data: %w", err)
}

// Phase 5: Process file metadata
result, err := utils.ProcessSingleFile(input.FilePath, locData.Latitude, locData.Longitude, locData.TimezoneID, true)
if err != nil {
errMsg := err.Error()
output.Error = &errMsg
output.ProcessingTime = time.Since(startTime).String()
return output, fmt.Errorf("file processing failed: %w", err)
}

// Populate output with extracted metadata
output.FileName = result.FileName
output.Hash = result.Hash
output.Duration = result.Duration
output.SampleRate = result.SampleRate
output.TimestampLocal = result.TimestampLocal
output.IsAudioMoth = result.IsAudioMoth

// Phase 6: Ensure cluster path is set
if err := utils.EnsureClusterPath(database, input.ClusterID, filepath.Dir(input.FilePath)); err != nil {
return output, fmt.Errorf("failed to set cluster path: %w", err)
}

// Phase 7: Insert into database
fileID, isDuplicate, err := insertFileIntoDB(ctx, database, result, input.DatasetID, input.ClusterID, input.LocationID)
if err != nil {
errMsg := err.Error()
output.Error = &errMsg
output.ProcessingTime = time.Since(startTime).String()
return output, fmt.Errorf("database insertion failed: %w", err)
}

output.FileID = fileID
output.IsDuplicate = isDuplicate
output.ProcessingTime = time.Since(startTime).String()

return output, nil
}

// validateFilePath validates the file exists, is a regular file, is a WAV file, and is not empty
func validateFilePath(filePath string) (os.FileInfo, error) {
// Check file exists
info, err := os.Stat(filePath)
if err != nil {
if os.IsNotExist(err) {
return nil, fmt.Errorf("file does not exist: %s", filePath)
}
return nil, fmt.Errorf("cannot access file: %w", err)
}

// Check it's a regular file
if !info.Mode().IsRegular() {
return nil, fmt.Errorf("path is not a regular file: %s", filePath)
}

// Check extension is .wav (case-insensitive)
ext := strings.ToLower(filepath.Ext(filePath))
if ext != ".wav" {
return nil, fmt.Errorf("file must be a WAV file (got extension: %s)", ext)
}

// Check file is not empty
if info.Size() == 0 {
return nil, fmt.Errorf("file is empty: %s", filePath)
}

return info, nil
}

// insertFileIntoDB inserts a single file into the database
// Returns (fileID, isDuplicate, error)
func insertFileIntoDB(
ctx context.Context,
database *sql.DB,
result *utils.FileProcessingResult,
datasetID, clusterID, locationID string,
) (string, bool, error) {
// Begin logged transaction
tx, err := db.BeginLoggedTx(ctx, database, "import_audio_file")
if err != nil {
return "", false, fmt.Errorf("failed to begin transaction: %w", err)
}
defer tx.Rollback() // Rollback if not committed

// Check for duplicate hash
existingID, isDup, err := utils.CheckDuplicateHash(tx, result.Hash)
if err != nil {
return "", false, err
}
if isDup {
return existingID, true, nil
}

// Generate file ID
fileID, err := utils.GenerateLongID()
if err != nil {
return "", false, fmt.Errorf("ID generation failed: %w", err)
}

// Insert file record
_, err = tx.ExecContext(ctx, `
INSERT INTO file (
id, file_name, xxh64_hash, location_id, timestamp_local,
cluster_id, duration, sample_rate, maybe_solar_night, maybe_civil_night,
moon_phase, created_at, last_modified, active
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, now(), now(), true)
`,
fileID, result.FileName, result.Hash, locationID,
result.TimestampLocal, clusterID, result.Duration, result.SampleRate,
result.AstroData.SolarNight, result.AstroData.CivilNight, result.AstroData.MoonPhase,
)
if err != nil {
return "", false, fmt.Errorf("file insert failed: %w", err)
}

// Insert file_dataset junction
_, err = tx.ExecContext(ctx, `
INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified)
VALUES (?, ?, now(), now())
`, fileID, datasetID)
if err != nil {
return "", false, fmt.Errorf("file_dataset insert failed: %w", err)
}

// If AudioMoth, insert moth_metadata
if result.IsAudioMoth && result.MothData != nil {
_, err = tx.ExecContext(ctx, `
INSERT INTO moth_metadata (
file_id, timestamp, recorder_id, gain, battery_v, temp_c,
created_at, last_modified, active
) VALUES (?, ?, ?, ?, ?, ?, now(), now(), true)
`,
fileID,
result.MothData.Timestamp,
&result.MothData.RecorderID,
&result.MothData.Gain,
&result.MothData.BatteryV,
&result.MothData.TempC,
)
if err != nil {
return "", false, fmt.Errorf("moth_metadata insert failed: %w", err)
}
}

// Commit transaction
if err = tx.Commit(); err != nil {
return "", false, fmt.Errorf("transaction commit failed: %w", err)
}

return fileID, false, nil
}
file addition: bulk_file_import.go (----------)

[0.1]

package imp

import (
"context"
"database/sql"
"encoding/csv"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"time"

"skraak/db"
"skraak/utils"
)

// BulkFileImportInput defines the input parameters for the bulk_file_import tool
type BulkFileImportInput struct {
DBPath string `json:"db_path"`
DatasetID string `json:"dataset_id"`
CSVPath string `json:"csv_path"`
LogFilePath string `json:"log_file_path"`
}

// BulkFileImportOutput defines the output structure for the bulk_file_import tool
type BulkFileImportOutput struct {
TotalLocations int `json:"total_locations"`
ClustersCreated int `json:"clusters_created"`
ClustersExisting int `json:"clusters_existing"`
TotalFilesScanned int `json:"total_files_scanned"`
FilesImported int `json:"files_imported"`
FilesDuplicate int `json:"files_duplicate"`
FilesError int `json:"files_error"`
ProcessingTime string `json:"processing_time"`
Errors []string `json:"errors,omitempty"`
}

// bulkLocationData holds CSV row data for a location
type bulkLocationData struct {
LocationName string
LocationID string
DirectoryPath string
DateRange string
SampleRate int
FileCount int
}

// bulkImportStats tracks import statistics for a single cluster
type bulkImportStats struct {
TotalFiles int
ImportedFiles int
DuplicateFiles int
ErrorFiles int
}

// progressLogger handles writing to both log file and internal buffer
type progressLogger struct {
file *os.File
buffer *strings.Builder
}

// Log writes a formatted message with timestamp to both log file and buffer
func (l *progressLogger) Log(format string, args ...any) {
timestamp := time.Now().Format("2006-01-02 15:04:05")
message := fmt.Sprintf(format, args...)
line := fmt.Sprintf("[%s] %s\n", timestamp, message)

// Write to file; log write failures are non-fatal for import progress
if _, err := l.file.WriteString(line); err != nil {
fmt.Fprintf(os.Stderr, "Warning: log write failed: %v\n", err)
}
if err := l.file.Sync(); err != nil {
fmt.Fprintf(os.Stderr, "Warning: log sync failed: %v\n", err)
}

// Also keep in memory for potential error reporting
l.buffer.WriteString(line)
}

// BulkFileImport imports WAV files across multiple locations using CSV specification
// failOutput sets error details and processing time on the output before returning.
func (o *BulkFileImportOutput) failOutput(errs []string, startTime time.Time) {
o.Errors = errs
o.ProcessingTime = time.Since(startTime).String()
}

// BulkFileImport imports WAV files across multiple locations using CSV specification
func BulkFileImport(
ctx context.Context,
input BulkFileImportInput,
) (BulkFileImportOutput, error) {
startTime := time.Now()
var output BulkFileImportOutput

// Open log file
logFile, err := os.OpenFile(input.LogFilePath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
if err != nil {
return output, fmt.Errorf("failed to open log file: %w", err)
}
defer func() { _ = logFile.Close() }()

logger := &progressLogger{
file: logFile,
buffer: &strings.Builder{},
}

logger.Log("Starting bulk file import for dataset %s", input.DatasetID)

// Phase 0: Validate input
logger.Log("Validating input parameters...")
if err := bulkValidateInput(input); err != nil {
logger.Log("ERROR: Validation failed: %v", err)
output.failOutput([]string{fmt.Sprintf("validation failed: %v", err)}, startTime)
return output, fmt.Errorf("validation failed: %w", err)
}
logger.Log("Validation complete")

// Phase 1: Read CSV
logger.Log("Reading CSV file: %s", input.CSVPath)
locations, err := bulkReadCSV(input.CSVPath)
if err != nil {
logger.Log("ERROR: Failed to read CSV: %v", err)
output.failOutput([]string{fmt.Sprintf("failed to read CSV: %v", err)}, startTime)
return output, fmt.Errorf("failed to read CSV: %w", err)
}
logger.Log("Loaded %d locations from CSV", len(locations))
output.TotalLocations = len(locations)

// Phase 1.5: Validate all location_ids belong to the dataset
logger.Log("Validating location_ids belong to dataset...")
if err := bulkValidateLocations(logger, locations, input.DatasetID, db.ResolveDBPath(input.DBPath, "")); err != nil {
output.failOutput([]string{err.Error()}, startTime)
return output, err
}
logger.Log("Location validation complete")

// Phase 2: Create/Validate Clusters
logger.Log("=== Phase 1: Creating/Validating Clusters ===")
database, err := db.OpenWriteableDB(db.ResolveDBPath(input.DBPath, ""))
if err != nil {
logger.Log("ERROR: Failed to open database: %v", err)
output.failOutput([]string{fmt.Sprintf("failed to open database: %v", err)}, startTime)
return output, fmt.Errorf("failed to open database: %w", err)
}
defer database.Close()

clusterIDMap, created, existing, err := bulkCreateClusters(ctx, database, logger, locations, input.DatasetID)
if err != nil {
output.failOutput(output.Errors, startTime)
return output, err
}
output.ClustersCreated = created
output.ClustersExisting = existing

// Phase 3: Import files
logger.Log("=== Phase 2: Importing Files ===")
fileStats, errs := bulkImportAllFiles(database, logger, locations, clusterIDMap, input.DatasetID)
output.TotalFilesScanned = fileStats.TotalFiles
output.FilesImported = fileStats.ImportedFiles
output.FilesDuplicate = fileStats.DuplicateFiles
output.FilesError = fileStats.ErrorFiles
output.Errors = append(output.Errors, errs...)

if len(errs) > 0 {
output.ProcessingTime = time.Since(startTime).String()
return output, fmt.Errorf("failed to import files: %s", errs[0])
}

logger.Log("=== Import Complete ===")
logger.Log("Total files scanned: %d", fileStats.TotalFiles)
logger.Log("Files imported: %d", fileStats.ImportedFiles)
logger.Log("Duplicates skipped: %d", fileStats.DuplicateFiles)
logger.Log("Errors: %d", fileStats.ErrorFiles)
logger.Log("Processing time: %s", time.Since(startTime).Round(time.Second))

output.ProcessingTime = time.Since(startTime).String()

return output, nil
}

// bulkValidateInput validates input parameters
func bulkValidateInput(input BulkFileImportInput) error {
// Validate ID format first (fast fail before DB queries)
if err := utils.ValidateShortID(input.DatasetID, "dataset_id"); err != nil {
return err
}

// Verify CSV file exists
if _, err := os.Stat(input.CSVPath); err != nil {
return fmt.Errorf("CSV file not accessible: %w", err)
}

// Verify log file path is writable
logDir := filepath.Dir(input.LogFilePath)
if _, err := os.Stat(logDir); err != nil {
return fmt.Errorf("log file directory not accessible: %w", err)
}

// Open database for validation queries
database, err := db.OpenReadOnlyDB(db.ResolveDBPath(input.DBPath, ""))
if err != nil {
return fmt.Errorf("failed to open database: %w", err)
}
defer database.Close()

// Verify dataset exists and is structured
if err := db.ValidateDatasetTypeForImport(database, input.DatasetID); err != nil {
return err
}

return nil
}

// bulkValidateLocationsBelongToDataset validates that all unique location_ids in the CSV belong to the dataset
func bulkValidateLocationsBelongToDataset(dbConn *sql.DB, locations []bulkLocationData, datasetID string) []string {
var errors []string

// Collect unique location_ids
uniqueLocations := make(map[string]bool)
for _, loc := range locations {
uniqueLocations[loc.LocationID] = true
}

// Validate each unique location_id
for locationID := range uniqueLocations {
if err := db.ValidateLocationBelongsToDataset(dbConn, locationID, datasetID); err != nil {
errors = append(errors, err.Error())
}
}

return errors
}

// bulkValidateLocations validates that all location_ids in the CSV belong to the dataset.
// Returns an error if validation fails.
func bulkValidateLocations(logger *progressLogger, locations []bulkLocationData, datasetID string, dbPath string) error {
readDB, err := db.OpenReadOnlyDB(dbPath)
if err != nil {
logger.Log("ERROR: Failed to open database: %v", err)
return fmt.Errorf("failed to open database: %w", err)
}

locationErrors := bulkValidateLocationsBelongToDataset(readDB, locations, datasetID)
readDB.Close()

if len(locationErrors) > 0 {
for _, locErr := range locationErrors {
logger.Log("ERROR: %s", locErr)
}
return fmt.Errorf("location validation failed: %d location(s) do not belong to dataset %s", len(locationErrors), datasetID)
}
return nil
}

// bulkCreateClusters creates or validates clusters for all locations.
// Returns the cluster ID map, counts of created/existing clusters, and any error.
func bulkCreateClusters(ctx context.Context, database *sql.DB, logger *progressLogger, locations []bulkLocationData, datasetID string) (map[string]string, int, int, error) {
clusterIDMap := make(map[string]string)
created := 0
existing := 0

for i, loc := range locations {
logger.Log("[%d/%d] Processing location: %s", i+1, len(locations), loc.LocationName)

var existingClusterID string
err := database.QueryRow(`
SELECT id FROM cluster
WHERE location_id = ? AND name = ? AND active = true
`, loc.LocationID, loc.DateRange).Scan(&existingClusterID)

var clusterID string
if err == sql.ErrNoRows {
clusterID, err = bulkCreateCluster(ctx, database, datasetID, loc.LocationID, loc.DateRange, loc.SampleRate)
if err != nil {
logger.Log("ERROR: Failed to create cluster for location %s: %v", loc.LocationName, err)
return nil, 0, 0, fmt.Errorf("failed to create cluster: %w", err)
}
logger.Log(" Created cluster: %s", clusterID)
created++
} else if err != nil {
logger.Log("ERROR: Failed to check cluster for location %s: %v", loc.LocationName, err)
return nil, 0, 0, fmt.Errorf("failed to check cluster: %w", err)
} else {
clusterID = existingClusterID
logger.Log(" Using existing cluster: %s", clusterID)
existing++
}

compositeKey := loc.LocationID + "|" + loc.DateRange
clusterIDMap[compositeKey] = clusterID
}

return clusterIDMap, created, existing, nil
}

// bulkImportAllFiles imports files for all locations using the cluster ID map.
// Returns aggregate stats and any error messages.
func bulkImportAllFiles(database *sql.DB, logger *progressLogger, locations []bulkLocationData, clusterIDMap map[string]string, datasetID string) (bulkImportStats, []string) {
var total bulkImportStats
var errs []string

for i, loc := range locations {
compositeKey := loc.LocationID + "|" + loc.DateRange
clusterID, ok := clusterIDMap[compositeKey]
if !ok {
continue
}

logger.Log("[%d/%d] Importing files for: %s", i+1, len(locations), loc.LocationName)
logger.Log(" Directory: %s", loc.DirectoryPath)

if _, err := os.Stat(loc.DirectoryPath); os.IsNotExist(err) {
logger.Log(" WARNING: Directory not found, skipping")
continue
}

stats, err := bulkImportFilesForCluster(database, logger, loc.DirectoryPath, datasetID, loc.LocationID, clusterID)
if err != nil {
errMsg := fmt.Sprintf("Failed to import files for location %s: %v", loc.LocationName, err)
logger.Log("ERROR: %s", errMsg)
return total, []string{errMsg}
}

logger.Log(" Scanned: %d files", stats.TotalFiles)
logger.Log(" Imported: %d, Duplicates: %d", stats.ImportedFiles, stats.DuplicateFiles)
if stats.ErrorFiles > 0 {
logger.Log(" Errors: %d files", stats.ErrorFiles)
}

total.TotalFiles += stats.TotalFiles
total.ImportedFiles += stats.ImportedFiles
total.DuplicateFiles += stats.DuplicateFiles
total.ErrorFiles += stats.ErrorFiles
}

return total, errs
}
func bulkReadCSV(path string) ([]bulkLocationData, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer func() { _ = file.Close() }()

reader := csv.NewReader(file)
records, err := reader.ReadAll()
if err != nil {
return nil, err
}

if len(records) == 0 {
return nil, fmt.Errorf("CSV file is empty")
}

var locations []bulkLocationData
for i, record := range records {
if i == 0 {
continue // Skip header
}

if len(record) < 6 {
return nil, fmt.Errorf("CSV row %d has insufficient columns (expected 6, got %d)", i+1, len(record))
}

// Validate required string fields are non-empty
locationName := strings.TrimSpace(record[0])
if locationName == "" {
return nil, fmt.Errorf("empty location_name in row %d", i+1)
}
directoryPath := strings.TrimSpace(record[2])
if directoryPath == "" {
return nil, fmt.Errorf("empty directory_path in row %d", i+1)
}
dateRange := strings.TrimSpace(record[3])
if dateRange == "" {
return nil, fmt.Errorf("empty date_range in row %d", i+1)
}

// Validate location_id format
locationID := record[1]
if err := utils.ValidateShortID(locationID, "location_id"); err != nil {
return nil, fmt.Errorf("invalid location_id in row %d: %v", i+1, err)
}

sampleRate, err := strconv.Atoi(record[4])
if err != nil {
return nil, fmt.Errorf("invalid sample_rate in row %d: %v", i+1, err)
}

// Validate sample rate is in reasonable range
if err := utils.ValidateSampleRate(sampleRate); err != nil {
return nil, fmt.Errorf("invalid sample_rate in row %d: %v", i+1, err)
}

fileCount, err := strconv.Atoi(record[5])
if err != nil {
return nil, fmt.Errorf("invalid file_count in row %d: %v", i+1, err)
}

locations = append(locations, bulkLocationData{
LocationName: locationName,
LocationID: locationID,
DirectoryPath: directoryPath,
DateRange: dateRange,
SampleRate: sampleRate,
FileCount: fileCount,
})
}

return locations, nil
}

// bulkCreateCluster creates a new cluster in the database
func bulkCreateCluster(ctx context.Context, database *sql.DB, datasetID, locationID, name string, sampleRate int) (string, error) {
// Generate a 12-character nanoid
clusterID, err := utils.GenerateShortID()
if err != nil {
return "", fmt.Errorf("failed to generate cluster ID: %v", err)
}
now := time.Now().UTC()

// Get location name for the path
var locationName string
err = database.QueryRow("SELECT name FROM location WHERE id = ?", locationID).Scan(&locationName)
if err != nil {
return "", fmt.Errorf("failed to get location name: %v", err)
}

// Normalize path: replace spaces and special characters
path := strings.ReplaceAll(locationName, " ", "_")
path = strings.ReplaceAll(path, "/", "_")

tx, err := db.BeginLoggedTx(ctx, database, "bulk_file_import")
if err != nil {
return "", fmt.Errorf("failed to begin transaction: %w", err)
}
defer tx.Rollback()

_, err = tx.ExecContext(ctx, `
INSERT INTO cluster (id, dataset_id, location_id, name, path, sample_rate, active, created_at, last_modified)
VALUES (?, ?, ?, ?, ?, ?, true, ?, ?)
`, clusterID, datasetID, locationID, name, path, sampleRate, now, now)
if err != nil {
return "", fmt.Errorf("failed to insert cluster: %w", err)
}

if err = tx.Commit(); err != nil {
return "", fmt.Errorf("failed to commit cluster creation: %w", err)
}

return clusterID, nil
}

// bulkImportFilesForCluster imports all WAV files for a single cluster
func bulkImportFilesForCluster(database *sql.DB, logger *progressLogger, folderPath, datasetID, locationID, clusterID string) (*bulkImportStats, error) {
stats := &bulkImportStats{}

// Check if directory exists
if _, err := os.Stat(folderPath); os.IsNotExist(err) {
logger.Log(" WARNING: Directory not found, skipping")
return stats, nil
}

// Import the cluster (SAME LOGIC AS import_files.go)
logger.Log(" Importing cluster %s", clusterID)

ctx := context.Background()
tx, err := db.BeginLoggedTx(ctx, database, "import_audio_files")
if err != nil {
return nil, fmt.Errorf("failed to begin transaction: %w", err)
}

clusterOutput, err := utils.ImportCluster(database, tx.UnderlyingTx(), utils.ClusterImportInput{
FolderPath: folderPath,
DatasetID: datasetID,
LocationID: locationID,
ClusterID: clusterID,
Recursive: true,
})
if err != nil {
tx.Rollback()
return nil, err
}

if err := tx.Commit(); err != nil {
return nil, fmt.Errorf("transaction commit failed: %w", err)
}

// Map to bulk import stats
stats.TotalFiles = clusterOutput.TotalFiles
stats.ImportedFiles = clusterOutput.ImportedFiles
stats.DuplicateFiles = clusterOutput.SkippedFiles
stats.ErrorFiles = clusterOutput.FailedFiles

// Log errors
for i, fileErr := range clusterOutput.Errors {
if i < 5 { // Log first 5
logger.Log(" ERROR: %s: %s", fileErr.FileName, fileErr.Error)
}
}

logger.Log(" Complete: %d imported, %d duplicates, %d errors", stats.ImportedFiles, stats.DuplicateFiles, stats.ErrorFiles)

return stats, nil
}
file addition: calls (d--r------)

[6.248737]
file addition: parallel_aggregate.go (----------)

[0.67281]

package calls

import (
"fmt"
"os"
"path/filepath"
"sort"
"sync/atomic"
)

// parallelResult is the common interface for birda/raven worker results.
type parallelResult interface {
filePath() string
getCalls() []ClusteredCall
wasWritten() bool
wasSkipped() bool
getError() error
}

// aggregateStats holds the collected results from a parallel fan-out/fan-in.
type aggregateStats struct {
calls []ClusteredCall
speciesCount map[string]int
dataFilesWritten int
dataFilesSkipped int
filesProcessed int
filesDeleted int
firstErr error
}

// aggregateResults collects results from a channel of parallelResult values,
// handling error tracking, species counting, optional file deletion, and
// progress reporting. Returns the aggregated stats.
func aggregateResults(
results <-chan parallelResult,
total int,
processed *atomic.Int32,
deleteFiles bool,
progressHandler func(int, int, string),
) aggregateStats {
var stats aggregateStats
stats.speciesCount = make(map[string]int)

for result := range results {
if err := result.getError(); err != nil && stats.firstErr == nil {
stats.firstErr = err
}

if result.wasWritten() {
stats.dataFilesWritten++
}
if result.wasSkipped() {
stats.dataFilesSkipped++
}

for _, call := range result.getCalls() {
stats.calls = append(stats.calls, call)
stats.speciesCount[call.EbirdCode]++
}

stats.filesProcessed++

stats.maybeDeleteFile(deleteFiles, result)

if progressHandler != nil {
current := int(processed.Add(1))
progressHandler(current, total, filepath.Base(result.filePath()))
}
}

return stats
}

// maybeDeleteFile deletes the source file if requested and it was successfully processed.
func (s *aggregateStats) maybeDeleteFile(deleteFiles bool, result parallelResult) {
if !deleteFiles || !result.wasWritten() {
return
}
if err := os.Remove(result.filePath()); err != nil {
if s.firstErr == nil {
s.firstErr = fmt.Errorf("failed to delete %s: %w", result.filePath(), err)
}
} else {
s.filesDeleted++
}
}

// sortCallsByFileAndTime sorts calls by filename, then start time.
func sortCallsByFileAndTime(calls []ClusteredCall) {
sort.Slice(calls, func(i, j int) bool {
if calls[i].File != calls[j].File {
return calls[i].File < calls[j].File
}
return calls[i].StartTime < calls[j].StartTime
})
}
file addition: isnight.go (----------)

[0.67281]

package calls

import (
"fmt"
"strings"
"time"

"github.com/sixdouglas/suncalc"

"skraak/utils"
)

// IsNightInput defines the input parameters for the isnight tool
type IsNightInput struct {
FilePath string `json:"file_path"`
Lat float64 `json:"lat"`
Lng float64 `json:"lng"`
Timezone string `json:"timezone,omitempty"`
}

// IsNightOutput defines the output structure for the isnight tool
type IsNightOutput struct {
FilePath string `json:"file_path"`
TimestampUTC string `json:"timestamp_utc"`
SolarNight bool `json:"solar_night"`
CivilNight bool `json:"civil_night"`
DiurnalActive bool `json:"diurnal_active"`
MoonPhase float64 `json:"moon_phase"`
DurationSec float64 `json:"duration_seconds"`
TimestampSrc string `json:"timestamp_source"`
MidpointUTC string `json:"midpoint_utc"`
SunriseUTC string `json:"sunrise_utc,omitempty"`
SunsetUTC string `json:"sunset_utc,omitempty"`
DawnUTC string `json:"dawn_utc,omitempty"`
DuskUTC string `json:"dusk_utc,omitempty"`
}

// IsNight determines if a WAV file was recorded at night based on its
// metadata timestamp and the given GPS coordinates.
//
// Timestamp resolution order:
// 1. AudioMoth comment (timezone embedded)
// 2. Filename timestamp + timezone offset (requires --timezone)
// 3. File modification time (system local time)
func IsNight(input IsNightInput) (IsNightOutput, error) {
var output IsNightOutput

// Step 1: Parse WAV header
metadata, err := utils.ParseWAVHeader(input.FilePath)
if err != nil {
return output, fmt.Errorf("WAV header parsing failed: %w", err)
}

output.DurationSec = metadata.Duration

// Step 2: Resolve timestamp (use file mod time as fallback)
tsResult, err := utils.ResolveTimestamp(metadata, input.FilePath, input.Timezone, true, nil)
if err != nil {
return output, fmt.Errorf("cannot determine recording timestamp: %w", err)
}

// Determine timestamp source label
tsSource := "file_mod_time"
if tsResult.IsAudioMoth {
tsSource = "audiomoth_comment"
} else if utils.HasTimestampFilename(input.FilePath) {
tsSource = "filename"
}

// Step 3: Calculate astronomical data using recording midpoint
astroData := utils.CalculateAstronomicalData(
tsResult.Timestamp.UTC(),
metadata.Duration,
input.Lat,
input.Lng,
)

// Step 4: Get sun event times for informational output
midpoint := utils.CalculateMidpointTime(tsResult.Timestamp.UTC(), metadata.Duration)
sunTimes := suncalc.GetTimes(midpoint, input.Lat, input.Lng)

output.FilePath = input.FilePath
output.TimestampUTC = tsResult.Timestamp.UTC().Format(time.RFC3339)
output.SolarNight = astroData.SolarNight
output.CivilNight = astroData.CivilNight
output.MoonPhase = astroData.MoonPhase
output.TimestampSrc = tsSource
output.MidpointUTC = midpoint.Format(time.RFC3339)

populateSunTimes(&output, sunTimes, midpoint)

return output, nil
}

// sunTimeUTC returns the UTC RFC3339 string for a suncalc event, or "" if absent/zero.
func sunTimeUTC(sunTimes map[suncalc.DayTimeName]suncalc.DayTime, name suncalc.DayTimeName) string {
if entry, ok := sunTimes[name]; ok && !entry.Value.IsZero() {
return entry.Value.UTC().Format(time.RFC3339)
}
return ""
}

// populateSunTimes fills in sun event times and diurnal status from suncalc results.
func populateSunTimes(output *IsNightOutput, sunTimes map[suncalc.DayTimeName]suncalc.DayTime, midpoint time.Time) {
// Diurnal: midpoint is between dawn and sunset
if dawn, ok := sunTimes[suncalc.Dawn]; ok && !dawn.Value.IsZero() {
if sunset, ok := sunTimes[suncalc.Sunset]; ok && !sunset.Value.IsZero() {
output.DiurnalActive = !midpoint.Before(dawn.Value) && !midpoint.After(sunset.Value)
}
}

output.SunriseUTC = sunTimeUTC(sunTimes, suncalc.Sunrise)
output.SunsetUTC = sunTimeUTC(sunTimes, suncalc.Sunset)
output.DawnUTC = sunTimeUTC(sunTimes, suncalc.Dawn)
output.DuskUTC = sunTimeUTC(sunTimes, suncalc.Dusk)
}

// String returns a human-readable summary of the isnight result
func (o IsNightOutput) String() string {
var sb strings.Builder
fmt.Fprintf(&sb, "File: %s\n", o.FilePath)
fmt.Fprintf(&sb, "Timestamp (UTC): %s\n", o.TimestampUTC)
fmt.Fprintf(&sb, "Midpoint (UTC): %s\n", o.MidpointUTC)
fmt.Fprintf(&sb, "Duration: %.1f seconds\n", o.DurationSec)
fmt.Fprintf(&sb, "Source: %s\n", o.TimestampSrc)
fmt.Fprintf(&sb, "Solar night: %v\n", o.SolarNight)
fmt.Fprintf(&sb, "Civil night: %v\n", o.CivilNight)
fmt.Fprintf(&sb, "Moon phase: %.2f\n", o.MoonPhase)
if o.SunriseUTC != "" {
fmt.Fprintf(&sb, "Sunrise (UTC): %s\n", o.SunriseUTC)
}
if o.SunsetUTC != "" {
fmt.Fprintf(&sb, "Sunset (UTC): %s\n", o.SunsetUTC)
}
if o.DawnUTC != "" {
fmt.Fprintf(&sb, "Dawn (UTC): %s\n", o.DawnUTC)
}
if o.DuskUTC != "" {
fmt.Fprintf(&sb, "Dusk (UTC): %s\n", o.DuskUTC)
}
return sb.String()
}
file addition: calls_summarise.go (----------)

[0.67281]

package calls

import (
"sort"
"strings"

"skraak/utils"
)

// CallsSummariseInput defines the input for the calls-summarise tool
type CallsSummariseInput struct {
Folder string `json:"folder"`
Brief bool `json:"brief"`
Filter string `json:"filter,omitempty"`
}

// CallsSummariseOutput defines the output for the calls-summarise tool
type CallsSummariseOutput struct {
Segments []SegmentSummary `json:"segments"`
Folder string `json:"folder"`
DataFilesRead int `json:"data_files_read"`
DataFilesSkipped []string `json:"data_files_skipped"`
TotalSegments int `json:"total_segments"`
Filters map[string]FilterStats `json:"filters"`
ReviewStatus ReviewStatus `json:"review_status"`
Operators []string `json:"operators"`
Reviewers []string `json:"reviewers"`
Error *string `json:"error,omitempty"`
}

// SegmentSummary represents a single segment in the output
type SegmentSummary struct {
File string `json:"file"`
StartTime float64 `json:"start_time"`
EndTime float64 `json:"end_time"`
Labels []LabelSummary `json:"labels"`
}

// LabelSummary represents a label in the output (omits empty fields)
type LabelSummary struct {
Filter string `json:"filter"`
Certainty int `json:"certainty"`
Species string `json:"species"`
CallType string `json:"calltype,omitempty"`
Comment string `json:"comment,omitempty"`
Bookmark bool `json:"bookmark,omitempty"`
}

// FilterStats contains per-filter statistics
type FilterStats struct {
Segments int `json:"segments"`
Species map[string]int `json:"species"`
Calltypes map[string]map[string]int `json:"calltypes,omitempty"` // species -> calltype -> count
}

// ReviewStatus contains review progress statistics
type ReviewStatus struct {
Unreviewed int `json:"unreviewed"` // certainty < 100
Confirmed int `json:"confirmed"` // certainty = 100
DontKnow int `json:"dont_know"` // certainty = 0
WithCallType int `json:"with_calltype"`
WithComments int `json:"with_comments"`
Bookmarked int `json:"bookmarked"`
}

// CallsSummarise reads all .data files in a folder and produces a summary
func CallsSummarise(input CallsSummariseInput) (CallsSummariseOutput, error) {
var output CallsSummariseOutput

// Find all .data files
filePaths, err := utils.FindDataFiles(input.Folder)
if err != nil {
errMsg := err.Error()
output.Error = &errMsg
return output, err
}

// Initialize empty slices/maps (avoid null in JSON)
output.Segments = make([]SegmentSummary, 0)
output.Folder = input.Folder
output.Filters = make(map[string]FilterStats)
output.Operators = make([]string, 0)
output.Reviewers = make([]string, 0)
output.DataFilesSkipped = make([]string, 0)

if len(filePaths) == 0 {
return output, nil
}

// Track unique operators and reviewers
operatorSet := make(map[string]bool)
reviewerSet := make(map[string]bool)

summariseFiles(filePaths, input, &output, operatorSet, reviewerSet)

// Count segments for total
if input.Brief {
for _, fs := range output.Filters {
output.TotalSegments += fs.Segments
}
} else {
output.TotalSegments = len(output.Segments)
}

finaliseSummary(&output, operatorSet, reviewerSet, input.Brief)

return output, nil
}

// summariseFiles processes all data files, populating output stats
func summariseFiles(filePaths []string, input CallsSummariseInput, output *CallsSummariseOutput, operatorSet, reviewerSet map[string]bool) {
for _, path := range filePaths {
df, err := utils.ParseDataFile(path)
if err != nil {
output.DataFilesSkipped = append(output.DataFilesSkipped, path)
continue
}

output.DataFilesRead++
trackMeta(df.Meta, operatorSet, reviewerSet)

var relPath string
if !input.Brief {
relPath = extractRelativePath(input.Folder, path)
}

for _, seg := range df.Segments {
filteredLabels := filterLabels(seg.Labels, input.Filter)
if input.Filter != "" && len(filteredLabels) == 0 {
continue
}

updateStatsFromLabels(filteredLabels, output)

if !input.Brief {
output.Segments = append(output.Segments, SegmentSummary{
File: relPath,
StartTime: seg.StartTime,
EndTime: seg.EndTime,
Labels: buildLabelSummaries(filteredLabels),
})
}
}
}
}

// trackMeta records operator and reviewer from file metadata
func trackMeta(meta *utils.DataMeta, operatorSet, reviewerSet map[string]bool) {
if meta == nil {
return
}
if meta.Operator != "" {
operatorSet[meta.Operator] = true
}
if meta.Reviewer != "" {
reviewerSet[meta.Reviewer] = true
}
}

// filterLabels returns labels matching the filter, or all labels if filter is empty
func filterLabels(labels []*utils.Label, filter string) []*utils.Label {
if filter == "" {
return labels
}
var filtered []*utils.Label
for _, l := range labels {
if l.Filter == filter {
filtered = append(filtered, l)
}
}
return filtered
}

// buildLabelSummaries converts labels to label summaries
func buildLabelSummaries(labels []*utils.Label) []LabelSummary {
var summaries []LabelSummary
for _, l := range labels {
ls := LabelSummary{
Filter: l.Filter,
Certainty: l.Certainty,
Species: l.Species,
}
if l.CallType != "" {
ls.CallType = l.CallType
}
if l.Comment != "" {
ls.Comment = l.Comment
}
if l.Bookmark {
ls.Bookmark = true
}
summaries = append(summaries, ls)
}
return summaries
}

// updateStatsFromLabels updates filter stats and review status from a set of labels
func updateStatsFromLabels(labels []*utils.Label, output *CallsSummariseOutput) {
for _, l := range labels {
updateFilterStats(l, output)
updateReviewStatus(l, output)
}
}

// updateFilterStats increments filter-level statistics for a single label
func updateFilterStats(l *utils.Label, output *CallsSummariseOutput) {
fs, exists := output.Filters[l.Filter]
if !exists {
fs = FilterStats{
Segments: 0,
Species: make(map[string]int),
Calltypes: make(map[string]map[string]int),
}
}
fs.Segments++
fs.Species[l.Species]++

if l.CallType != "" {
if fs.Calltypes[l.Species] == nil {
fs.Calltypes[l.Species] = make(map[string]int)
}
fs.Calltypes[l.Species][l.CallType]++
}
output.Filters[l.Filter] = fs
}

// updateReviewStatus increments review status counters for a single label
func updateReviewStatus(l *utils.Label, output *CallsSummariseOutput) {
switch l.Certainty {
case 100:
output.ReviewStatus.Confirmed++
case 0:
output.ReviewStatus.DontKnow++
default:
output.ReviewStatus.Unreviewed++
}
if l.CallType != "" {
output.ReviewStatus.WithCallType++
}
if l.Comment != "" {
output.ReviewStatus.WithComments++
}
if l.Bookmark {
output.ReviewStatus.Bookmarked++
}
}

// finaliseSummary sorts output, cleans empty maps, and converts sets to sorted slices
func finaliseSummary(output *CallsSummariseOutput, operatorSet, reviewerSet map[string]bool, brief bool) {
// Clean up empty calltypes maps
for filter, fs := range output.Filters {
if len(fs.Calltypes) == 0 {
fs.Calltypes = nil
output.Filters[filter] = fs
}
}

// Convert sets to sorted slices
for op := range operatorSet {
output.Operators = append(output.Operators, op)
}
for r := range reviewerSet {
output.Reviewers = append(output.Reviewers, r)
}
sort.Strings(output.Operators)
sort.Strings(output.Reviewers)

// Sort segments by file, then start time
if !brief {
sort.Slice(output.Segments, func(i, j int) bool {
if output.Segments[i].File != output.Segments[j].File {
return output.Segments[i].File < output.Segments[j].File
}
return output.Segments[i].StartTime < output.Segments[j].StartTime
})
}
}

// extractRelativePath extracts the audio filename from a .data file path
// e.g., "/folder/tx51_LISTENING_20260221_203004.WAV.data" -> "tx51_LISTENING_20260221_203004.WAV"
// Preserves the original case of the extension as-is.
func extractRelativePath(folder, dataPath string) string {
// Get the filename
filename := dataPath
if idx := strings.LastIndex(dataPath, "/"); idx >= 0 {
filename = dataPath[idx+1:]
}

// Remove .data extension, preserve everything else
return strings.TrimSuffix(filename, ".data")
}
file addition: calls_show_images.go (----------)

[0.67281]

package calls

import (
"fmt"
"os"
"strings"

"skraak/utils"
)

// CallsShowImagesInput defines the input for the show-images tool
type CallsShowImagesInput struct {
DataFilePath string `json:"data_file_path"`
Color bool `json:"color"`
ImageSize int `json:"image_size"`
Sixel bool `json:"sixel"`
ITerm bool `json:"iterm"`
}

// CallsShowImagesOutput defines the output for the show-images tool
type CallsShowImagesOutput struct {
SegmentsShown int `json:"segments_shown"`
WavFile string `json:"wav_file"`
Error string `json:"error,omitempty"`
}

// CallsShowImages reads a .data file and displays spectrogram images for each segment
func CallsShowImages(input CallsShowImagesInput) (CallsShowImagesOutput, error) {
var output CallsShowImagesOutput

// Validate file exists
if _, err := os.Stat(input.DataFilePath); os.IsNotExist(err) {
output.Error = fmt.Sprintf("File not found: %s", input.DataFilePath)
return output, fmt.Errorf("%s", output.Error)
}

// Derive WAV file path (strip .data suffix)
wavPath := strings.TrimSuffix(input.DataFilePath, ".data")
output.WavFile = wavPath

// Check WAV file exists
if _, err := os.Stat(wavPath); os.IsNotExist(err) {
output.Error = fmt.Sprintf("WAV file not found: %s", wavPath)
return output, fmt.Errorf("%s", output.Error)
}

// Parse .data file (includes labels for future filtering)
dataFile, err := utils.ParseDataFile(input.DataFilePath)
if err != nil {
output.Error = err.Error()
return output, fmt.Errorf("%s", output.Error)
}

if len(dataFile.Segments) == 0 {
output.Error = "No segments found in .data file"
return output, fmt.Errorf("%s", output.Error)
}

// Resolve image size
imgSize := input.ImageSize
if imgSize == 0 {
imgSize = utils.SpectrogramDisplaySize
}

// Select graphics protocol
protocol := utils.ProtocolKitty
if input.ITerm {
protocol = utils.ProtocolITerm
} else if input.Sixel {
protocol = utils.ProtocolSixel
}

// Generate spectrogram for each segment and output
for i, seg := range dataFile.Segments {
// Generate spectrogram image
img, err := utils.GenerateSegmentSpectrogram(input.DataFilePath, seg.StartTime, seg.EndTime, input.Color, imgSize)
if err != nil || img == nil {
continue
}

// Print segment info
labelInfo := formatSegmentLabels(seg.Labels)
fmt.Fprintf(os.Stderr, "Segment %d: %.1fs - %.1fs (%.1fs)%s\n",
i+1, seg.StartTime, seg.EndTime, seg.EndTime-seg.StartTime, labelInfo)

// Write to stdout via terminal graphics protocol
if err := utils.WriteImage(img, os.Stdout, protocol); err != nil {
output.Error = fmt.Sprintf("Failed to write image: %v", err)
return output, fmt.Errorf("%s", output.Error)
}
fmt.Println() // Newline after image
}

output.SegmentsShown = len(dataFile.Segments)
return output, nil
}

// formatSegmentLabels formats labels for display in segment info
func formatSegmentLabels(labels []*utils.Label) string {
if len(labels) == 0 {
return ""
}
var parts []string
for _, l := range labels {
part := l.Species
if l.CallType != "" {
part += "/" + l.CallType
}
if l.Filter != "" {
part += " [" + l.Filter + "]"
}
parts = append(parts, part)
}
return " " + strings.Join(parts, ", ")
}
file addition: calls_push_certainty_test.go (----------)

[0.67281]

package calls

import (
"encoding/json"
"os"
"path/filepath"
"testing"

"skraak/utils"
)

func TestPushCertaintyPromotesMatchingLabels(t *testing.T) {
tempDir := t.TempDir()

// File with two Kiwi segments: certainty=90 and certainty=70
file1 := `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]], [10, 20, 100, 1000, [{"species": "Kiwi", "certainty": 70}]]]`
file1Path := filepath.Join(tempDir, "file1.data")
if err := os.WriteFile(file1Path, []byte(file1), 0644); err != nil {
t.Fatal(err)
}

// File with one Tomtit at certainty=90 (must not be promoted when species=Kiwi)
file2 := `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Tomtit", "certainty": 90}]]]`
file2Path := filepath.Join(tempDir, "file2.data")
if err := os.WriteFile(file2Path, []byte(file2), 0644); err != nil {
t.Fatal(err)
}

result, err := PushCertainty(PushCertaintyConfig{
Folder: tempDir,
Species: "Kiwi",
Reviewer: "TestReviewer",
})
if err != nil {
t.Fatal(err)
}

if result.SegmentsUpdated != 1 {
t.Errorf("expected 1 segment updated, got %d", result.SegmentsUpdated)
}
if result.FilesUpdated != 1 {
t.Errorf("expected 1 file updated, got %d", result.FilesUpdated)
}

// Verify file1: certainty=90 Kiwi → 100, certainty=70 Kiwi → unchanged
df, err := utils.ParseDataFile(file1Path)
if err != nil {
t.Fatal(err)
}
if df.Segments[0].Labels[0].Certainty != 100 {
t.Errorf("expected certainty=100, got %d", df.Segments[0].Labels[0].Certainty)
}
if df.Segments[1].Labels[0].Certainty != 70 {
t.Errorf("expected certainty=70 unchanged, got %d", df.Segments[1].Labels[0].Certainty)
}
if df.Meta.Reviewer != "TestReviewer" {
t.Errorf("expected reviewer=TestReviewer, got %q", df.Meta.Reviewer)
}

// Verify Tomtit file was not modified
df2, err := utils.ParseDataFile(file2Path)
if err != nil {
t.Fatal(err)
}
if df2.Segments[0].Labels[0].Certainty != 90 {
t.Errorf("Tomtit certainty should be unchanged at 90, got %d", df2.Segments[0].Labels[0].Certainty)
}
}

func TestPushCertaintyFilterScope(t *testing.T) {
tempDir := t.TempDir()

// Segment has two labels from different filters, both Kiwi certainty=90
data := []any{
map[string]any{"Operator": "test"},
[]any{0.0, 10.0, 100.0, 1000.0, []any{
map[string]any{"species": "Kiwi", "certainty": 90, "filter": "model-a"},
map[string]any{"species": "Kiwi", "certainty": 90, "filter": "model-b"},
}},
}
raw, _ := json.Marshal(data)
filePath := filepath.Join(tempDir, "file1.data")
if err := os.WriteFile(filePath, raw, 0644); err != nil {
t.Fatal(err)
}

// Push only model-a
result, err := PushCertainty(PushCertaintyConfig{
Folder: tempDir,
Filter: "model-a",
Species: "Kiwi",
Reviewer: "TestReviewer",
})
if err != nil {
t.Fatal(err)
}
if result.SegmentsUpdated != 1 {
t.Errorf("expected 1 segment updated, got %d", result.SegmentsUpdated)
}

// Verify only model-a label was promoted; model-b stays at 90
df, err := utils.ParseDataFile(filePath)
if err != nil {
t.Fatal(err)
}
for _, label := range df.Segments[0].Labels {
if label.Filter == "model-a" && label.Certainty != 100 {
t.Errorf("model-a label should be 100, got %d", label.Certainty)
}
if label.Filter == "model-b" && label.Certainty != 90 {
t.Errorf("model-b label should be unchanged at 90, got %d", label.Certainty)
}
}
}
file addition: calls_push_certainty.go (----------)

[0.67281]

package calls

import (
"fmt"

"skraak/utils"
)

// PushCertaintyConfig holds the configuration for push-certainty
type PushCertaintyConfig struct {
Folder string
File string
Filter string
Species string
CallType string
Night bool
Day bool
Lat float64
Lng float64
Timezone string
Reviewer string
}

// PushCertaintyResult holds the result of push-certainty
type PushCertaintyResult struct {
SegmentsUpdated int `json:"segments_updated"`
FilesUpdated int `json:"files_updated"`
TimeFilteredCount int `json:"time_filtered_count"`
}

// PushCertainty promotes all certainty=90 segments matching the filter scope to certainty=100.
// Uses identical filtering logic to LoadDataFiles so the scope matches calls classify exactly.
func PushCertainty(config PushCertaintyConfig) (*PushCertaintyResult, error) {
state, err := LoadDataFiles(ClassifyConfig{
Folder: config.Folder,
File: config.File,
Filter: config.Filter,
Species: config.Species,
CallType: config.CallType,
Certainty: 90,
Sample: -1,
Night: config.Night,
Day: config.Day,
Lat: config.Lat,
Lng: config.Lng,
Timezone: config.Timezone,
})
if err != nil {
return nil, err
}

var segsUpdated, filesUpdated int
for i, df := range state.DataFiles {
changed := false
for _, seg := range state.FilteredSegs()[i] {
for _, label := range seg.Labels {
if labelMatchesPush(label, config.Filter, config.Species, config.CallType) {
label.Certainty = 100
changed = true
segsUpdated++
}
}
}
if changed {
df.Meta.Reviewer = config.Reviewer
if err := df.Write(df.FilePath); err != nil {
return nil, fmt.Errorf("write %s: %w", df.FilePath, err)
}
filesUpdated++
}
}

return &PushCertaintyResult{
SegmentsUpdated: segsUpdated,
FilesUpdated: filesUpdated,
TimeFilteredCount: state.TimeFilteredCount,
}, nil
}

// labelMatchesPush returns true if the label matches the push scope and has certainty=90.
// Certainty is already guaranteed by LoadDataFiles, but we re-check to target only the
// specific label that matched (a segment may carry labels from multiple filters).
func labelMatchesPush(label *utils.Label, filter, species, callType string) bool {
if filter != "" && label.Filter != filter {
return false
}
if species != "" && label.Species != species {
return false
}
if callType != "" && label.CallType != callType {
return false
}
return label.Certainty == 90
}
file addition: calls_propagate_test.go (----------)

[0.67281]

package calls

import (
"path/filepath"
"testing"

"skraak/utils"
)

// helpers

func seg(start, end float64, labels ...*utils.Label) *utils.Segment {
return &utils.Segment{
StartTime: start,
EndTime: end,
FreqLow: 100,
FreqHigh: 8000,
Labels: labels,
}
}

func lbl(filter, species, calltype string, certainty int) *utils.Label {
return &utils.Label{
Filter: filter,
Species: species,
CallType: calltype,
Certainty: certainty,
}
}

func writeFile(t *testing.T, segs ...*utils.Segment) string {
t.Helper()
dir := t.TempDir()
path := filepath.Join(dir, "test.data")
df := &utils.DataFile{
Meta: &utils.DataMeta{Operator: "ML", Reviewer: "David", Duration: 3600},
Segments: segs,
}
if err := df.Write(path); err != nil {
t.Fatalf("write fixture: %v", err)
}
return path
}

func readFile(t *testing.T, path string) *utils.DataFile {
t.Helper()
df, err := utils.ParseDataFile(path)
if err != nil {
t.Fatalf("parse %s: %v", path, err)
}
return df
}

// findLabel returns the label with matching filter and time on the parsed file, or nil.
func findLabel(df *utils.DataFile, filter string, start, end float64) *utils.Label {
for _, s := range df.Segments {
if s.StartTime != start || s.EndTime != end {
continue
}
for _, l := range s.Labels {
if l.Filter == filter {
return l
}
}
}
return nil
}

const (
fFrom = "opensoundscape-kiwi-1.2"
fTo = "opensoundscape-kiwi-1.5"
)

func TestPropagate_HappyPathSingle(t *testing.T) {
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v (%s)", err, out.Error)
}
if out.Propagated != 1 || out.TargetsExamined != 1 || out.SkippedConflict != 0 || out.SkippedNoOverlap != 0 {
t.Fatalf("counts wrong: %+v", out)
}

df := readFile(t, path)
target := findLabel(df, fTo, 100, 125)
if target == nil {
t.Fatal("target label missing")
}
if target.Species != "Kiwi" || target.CallType != "Male" || target.Certainty != 90 {
t.Errorf("target not updated correctly: species=%q calltype=%q cert=%d", target.Species, target.CallType, target.Certainty)
}
if df.Meta.Reviewer != "Skraak" {
t.Errorf("reviewer = %q, want Skraak", df.Meta.Reviewer)
}
}

func TestPropagate_NoOverlap(t *testing.T) {
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
seg(500, 525, lbl(fTo, "Kiwi", "Duet", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.Propagated != 0 || out.TargetsExamined != 1 || out.SkippedNoOverlap != 1 {
t.Fatalf("counts wrong: %+v", out)
}
df := readFile(t, path)
target := findLabel(df, fTo, 500, 525)
if target.Certainty != 70 {
t.Errorf("target should not be modified, cert=%d", target.Certainty)
}
if df.Meta.Reviewer != "David" {
t.Errorf("reviewer should stay David (no write), got %q", df.Meta.Reviewer)
}
}

func TestPropagate_SourceWrongSpecies_Ignored(t *testing.T) {
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Weka", "", 100)),
seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.Propagated != 0 || out.SkippedNoOverlap != 1 {
t.Fatalf("counts wrong: %+v", out)
}
}

func TestPropagate_SourceWrongCertainty_Ignored(t *testing.T) {
// cert=70 and cert=0 source labels must NOT count as sources.
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 70)),
seg(200, 225, lbl(fFrom, "Don't Know", "", 0)),
seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
seg(200, 225, lbl(fTo, "Kiwi", "Male", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.Propagated != 0 || out.SkippedNoOverlap != 2 {
t.Fatalf("counts wrong: %+v", out)
}
}

func TestPropagate_SourceWrongFilter_Ignored(t *testing.T) {
path := writeFile(t,
seg(100, 125, lbl("some-other-filter", "Kiwi", "Male", 100)),
seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if !out.FiltersMissing || out.Propagated != 0 || out.TargetsExamined != 0 {
t.Fatalf("expected FiltersMissing=true with zero counts, got: %+v", out)
}
}

func TestPropagate_TargetCert100_NotTouched(t *testing.T) {
// Target with cert=100 is human-verified — must NOT be overwritten.
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
seg(100, 125, lbl(fTo, "Kiwi", "Male", 100)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.TargetsExamined != 0 || out.Propagated != 0 {
t.Fatalf("cert=100 target must not be examined: %+v", out)
}
df := readFile(t, path)
if df.Meta.Reviewer != "David" {
t.Errorf("reviewer should stay David (no write), got %q", df.Meta.Reviewer)
}
}

func TestPropagate_TargetCert90_NotTouched(t *testing.T) {
// Target with cert=90 (already propagated earlier) must NOT be re-propagated.
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
seg(100, 125, lbl(fTo, "Kiwi", "Female", 90)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.TargetsExamined != 0 || out.Propagated != 0 {
t.Fatalf("cert=90 target must not be examined: %+v", out)
}
df := readFile(t, path)
target := findLabel(df, fTo, 100, 125)
if target.Certainty != 90 || target.CallType != "Female" {
t.Errorf("cert=90 target was modified: %+v", target)
}
}

func TestPropagate_TargetCert0_Propagated(t *testing.T) {
// Target at cert=0 ("Don't Know" / "Noise") SHOULD be propagated when an
// overlapping cert=100 source exists — rescues labels from the noise bucket
// so they surface for review even if occasionally wrong.
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
seg(100, 125, lbl(fTo, "Don't Know", "", 0)),
seg(200, 225, lbl(fFrom, "Kiwi", "Female", 100)),
seg(200, 225, lbl(fTo, "Noise", "", 0)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.TargetsExamined != 2 || out.Propagated != 2 {
t.Fatalf("cert=0 targets must be propagated: %+v", out)
}
df := readFile(t, path)
for _, c := range []struct {
start, end float64
calltype string
}{{100, 125, "Male"}, {200, 225, "Female"}} {
l := findLabel(df, fTo, c.start, c.end)
if l == nil || l.Species != "Kiwi" || l.CallType != c.calltype || l.Certainty != 90 {
t.Errorf("at %v-%v got %+v, want Kiwi+%s cert=90", c.start, c.end, l, c.calltype)
}
}
}

func TestPropagate_MultipleSourcesAgree(t *testing.T) {
// Two overlapping sources with same calltype → propagate.
path := writeFile(t,
seg(100, 110, lbl(fFrom, "Kiwi", "Male", 100)),
seg(105, 120, lbl(fFrom, "Kiwi", "Male", 100)),
seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.Propagated != 1 || out.SkippedConflict != 0 {
t.Fatalf("counts wrong: %+v", out)
}
df := readFile(t, path)
target := findLabel(df, fTo, 100, 125)
if target.CallType != "Male" {
t.Errorf("calltype should be Male, got %q", target.CallType)
}
}

func TestPropagate_MultipleSourcesConflict(t *testing.T) {
// Two overlapping sources with different calltypes → conflict, skip, report.
path := writeFile(t,
seg(100, 110, lbl(fFrom, "Kiwi", "Male", 100)),
seg(115, 120, lbl(fFrom, "Kiwi", "Female", 100)),
seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.Propagated != 0 || out.SkippedConflict != 1 {
t.Fatalf("expected 1 conflict skip: %+v", out)
}
if len(out.Conflicts) != 1 {
t.Fatalf("expected 1 conflict report, got %d", len(out.Conflicts))
}
if out.Conflicts[0].TargetStart != 100 || out.Conflicts[0].TargetEnd != 125 {
t.Errorf("conflict target wrong: %+v", out.Conflicts[0])
}
if len(out.Conflicts[0].SourceChoices) != 2 {
t.Errorf("expected 2 source choices, got %d", len(out.Conflicts[0].SourceChoices))
}
// Target must NOT be modified.
df := readFile(t, path)
target := findLabel(df, fTo, 100, 125)
if target.CallType != "Duet" || target.Certainty != 70 {
t.Errorf("conflicted target was modified: %+v", target)
}
if df.Meta.Reviewer != "David" {
t.Errorf("reviewer should stay David (no write), got %q", df.Meta.Reviewer)
}
}

func TestPropagate_EmptyCallTypePropagates(t *testing.T) {
// Source with empty calltype → target gets empty calltype.
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Kiwi", "", 100)),
seg(100, 125, lbl(fTo, "Kiwi", "Male", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.Propagated != 1 {
t.Fatalf("expected propagated=1: %+v", out)
}
df := readFile(t, path)
target := findLabel(df, fTo, 100, 125)
if target.CallType != "" {
t.Errorf("calltype should be cleared, got %q", target.CallType)
}
if target.Species != "Kiwi" || target.Certainty != 90 {
t.Errorf("target fields wrong: %+v", target)
}
}

func TestPropagate_SpeciesOverride(t *testing.T) {
// Target species was different from --species; must be overwritten.
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
seg(100, 125, lbl(fTo, "Don't Know", "", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.Propagated != 1 {
t.Fatalf("expected propagated=1: %+v", out)
}
df := readFile(t, path)
target := findLabel(df, fTo, 100, 125)
if target.Species != "Kiwi" || target.CallType != "Male" || target.Certainty != 90 {
t.Errorf("target not overwritten correctly: %+v", target)
}
}

func TestPropagate_OverlapBoundaryExclusive(t *testing.T) {
// Segments touching at a point (src ends exactly where tgt starts) do NOT overlap.
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
seg(125, 150, lbl(fTo, "Kiwi", "Duet", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.Propagated != 0 || out.SkippedNoOverlap != 1 {
t.Fatalf("touching boundary must not count as overlap: %+v", out)
}
}

func TestPropagate_OverlapPartial(t *testing.T) {
// 1-second overlap is enough.
path := writeFile(t,
seg(100, 126, lbl(fFrom, "Kiwi", "Male", 100)),
seg(125, 150, lbl(fTo, "Kiwi", "Duet", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.Propagated != 1 {
t.Fatalf("expected propagated=1: %+v", out)
}
}

func TestPropagate_SupersetEitherDirection(t *testing.T) {
// Source engulfs target.
path1 := writeFile(t,
seg(100, 200, lbl(fFrom, "Kiwi", "Male", 100)),
seg(110, 150, lbl(fTo, "Kiwi", "Duet", 70)),
)
if out, _ := CallsPropagate(CallsPropagateInput{File: path1, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"}); out.Propagated != 1 {
t.Errorf("source-engulfs-target: %+v", out)
}

// Target engulfs source.
path2 := writeFile(t,
seg(110, 150, lbl(fFrom, "Kiwi", "Male", 100)),
seg(100, 200, lbl(fTo, "Kiwi", "Duet", 70)),
)
if out, _ := CallsPropagate(CallsPropagateInput{File: path2, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"}); out.Propagated != 1 {
t.Errorf("target-engulfs-source: %+v", out)
}
}

func TestPropagate_MissingFlags(t *testing.T) {
cases := []struct {
name string
in CallsPropagateInput
}{
{"no file", CallsPropagateInput{FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"}},
{"no from", CallsPropagateInput{File: "x", ToFilter: fTo, Species: "Kiwi"}},
{"no to", CallsPropagateInput{File: "x", FromFilter: fFrom, Species: "Kiwi"}},
{"no species", CallsPropagateInput{File: "x", FromFilter: fFrom, ToFilter: fTo}},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
_, err := CallsPropagate(c.in)
if err == nil {
t.Errorf("expected error")
}
})
}
}

func TestPropagate_SameFromAndTo(t *testing.T) {
_, err := CallsPropagate(CallsPropagateInput{
File: "x", FromFilter: fFrom, ToFilter: fFrom, Species: "Kiwi",
})
if err == nil {
t.Error("expected error when --from == --to")
}
}

func TestPropagate_NonexistentFile(t *testing.T) {
_, err := CallsPropagate(CallsPropagateInput{
File: "/nonexistent/path.data", FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err == nil {
t.Error("expected error for nonexistent file")
}
}

func TestPropagate_RealisticMixed(t *testing.T) {
// Mimics the 20260228_211500.WAV.data case: cert=0 "Don't Know" and cert=100 Kiwi sources
// coexist; only cert=100 Kiwi gets propagated.
path := writeFile(t,
// Sources (kiwi-1.2)
seg(45, 52.5, lbl(fFrom, "Don't Know", "", 0)),
seg(142.5, 177.5, lbl(fFrom, "Kiwi", "Male", 100)),
seg(195, 217.5, lbl(fFrom, "Don't Know", "", 0)),
seg(647.5, 682.5, lbl(fFrom, "Kiwi", "Female", 100)),
seg(815, 855, lbl(fFrom, "Kiwi", "Duet", 100)),
// Targets (kiwi-1.5)
seg(147.5, 167.5, lbl(fTo, "Kiwi", "Male", 70)),
seg(647.5, 672.5, lbl(fTo, "Kiwi", "Female", 70)),
seg(815, 852.5, lbl(fTo, "Kiwi", "Duet", 70)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.TargetsExamined != 3 || out.Propagated != 3 || out.SkippedConflict != 0 {
t.Fatalf("counts wrong: %+v", out)
}
df := readFile(t, path)
expect := []struct {
start, end float64
calltype string
}{
{147.5, 167.5, "Male"},
{647.5, 672.5, "Female"},
{815, 852.5, "Duet"},
}
for _, e := range expect {
l := findLabel(df, fTo, e.start, e.end)
if l == nil || l.Certainty != 90 || l.CallType != e.calltype || l.Species != "Kiwi" {
t.Errorf("at %v-%v got %+v, want Kiwi+%s cert=90", e.start, e.end, l, e.calltype)
}
}
}

func TestPropagate_NoWriteIfNothingChanged(t *testing.T) {
// File with only non-target segments should not be rewritten (reviewer unchanged).
path := writeFile(t,
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
)

out, err := CallsPropagate(CallsPropagateInput{
File: path, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.Propagated != 0 || out.TargetsExamined != 0 {
t.Fatalf("expected no activity: %+v", out)
}
df := readFile(t, path)
if df.Meta.Reviewer != "David" {
t.Errorf("reviewer should not be touched, got %q", df.Meta.Reviewer)
}
}

// writeFileAt is like writeFile but puts the file inside an existing dir
// with a caller-provided basename (must end in .data).
func writeFileAt(t *testing.T, dir, base string, segs ...*utils.Segment) string {
t.Helper()
path := filepath.Join(dir, base)
df := &utils.DataFile{
Meta: &utils.DataMeta{Operator: "ML", Reviewer: "David", Duration: 3600},
Segments: segs,
}
if err := df.Write(path); err != nil {
t.Fatalf("write fixture: %v", err)
}
return path
}

// assertPropagateStats checks output stats against expected values.
func assertPropagateStats(t *testing.T, got, want CallsPropagateFolderOutput) {
t.Helper()
checks := []struct {
name string
got int
want int
}{
{"FilesTotal", got.FilesTotal, want.FilesTotal},
{"FilesWithBothFilters", got.FilesWithBothFilters, want.FilesWithBothFilters},
{"FilesSkippedNoFilter", got.FilesSkippedNoFilter, want.FilesSkippedNoFilter},
{"FilesChanged", got.FilesChanged, want.FilesChanged},
{"FilesErrored", got.FilesErrored, want.FilesErrored},
{"TargetsExamined", got.TargetsExamined, want.TargetsExamined},
{"Propagated", got.Propagated, want.Propagated},
{"SkippedNoOverlap", got.SkippedNoOverlap, want.SkippedNoOverlap},
}
for _, c := range checks {
if c.got != c.want {
t.Errorf("%s: got %d, want %d", c.name, c.got, c.want)
}
}
}

func TestPropagateFolder_AggregatesAndSkipsMissing(t *testing.T) {
dir := t.TempDir()

// File A: both filters present, one clean propagation.
aPath := writeFileAt(t, dir, "a.wav.data",
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
seg(100, 125, lbl(fTo, "Kiwi", "Duet", 70)),
)
// File B: only target filter — missing source, must be skipped silently.
bPath := writeFileAt(t, dir, "b.wav.data",
seg(200, 225, lbl(fTo, "Kiwi", "Duet", 70)),
)
// File C: only source filter — missing target, must be skipped silently.
writeFileAt(t, dir, "c.wav.data",
seg(300, 325, lbl(fFrom, "Kiwi", "Male", 100)),
)
// File D: both filters, but no overlap → targets examined, none propagated.
dPath := writeFileAt(t, dir, "d.wav.data",
seg(400, 425, lbl(fFrom, "Kiwi", "Male", 100)),
seg(500, 525, lbl(fTo, "Kiwi", "Duet", 70)),
)

out, err := CallsPropagateFolder(CallsPropagateFolderInput{
Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}

assertPropagateStats(t, out, CallsPropagateFolderOutput{
FilesTotal: 4,
FilesWithBothFilters: 2,
FilesSkippedNoFilter: 2,
FilesChanged: 1,
FilesErrored: 0,
TargetsExamined: 2,
Propagated: 1,
SkippedNoOverlap: 1,
})

t.Run("file_a_propagated", func(t *testing.T) {
aDf := readFile(t, aPath)
if aDf.Meta.Reviewer != "Skraak" {
t.Errorf("reviewer: got %q, want Skraak", aDf.Meta.Reviewer)
}
if l := findLabel(aDf, fTo, 100, 125); l == nil || l.Certainty != 90 || l.CallType != "Male" {
t.Errorf("target label: got %+v, want cert=90 calltype=Male", l)
}
})

t.Run("file_b_skipped", func(t *testing.T) {
bDf := readFile(t, bPath)
if bDf.Meta.Reviewer != "David" {
t.Errorf("reviewer should not be touched, got %q", bDf.Meta.Reviewer)
}
})

t.Run("file_d_no_overlap", func(t *testing.T) {
dDf := readFile(t, dPath)
if dDf.Meta.Reviewer != "David" {
t.Errorf("reviewer should not be touched, got %q", dDf.Meta.Reviewer)
}
if l := findLabel(dDf, fTo, 500, 525); l == nil || l.Certainty != 70 {
t.Errorf("target label should be unchanged cert=70, got %+v", l)
}
})
}

func TestPropagateFolder_EmptyFolder(t *testing.T) {
dir := t.TempDir()
out, err := CallsPropagateFolder(CallsPropagateFolderInput{
Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.FilesTotal != 0 || out.Propagated != 0 {
t.Errorf("expected empty result, got %+v", out)
}
}

func TestPropagateFolder_MissingRequiredFlags(t *testing.T) {
dir := t.TempDir()
cases := []CallsPropagateFolderInput{
{Folder: "", FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi"},
{Folder: dir, FromFilter: "", ToFilter: fTo, Species: "Kiwi"},
{Folder: dir, FromFilter: fFrom, ToFilter: "", Species: "Kiwi"},
{Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: ""},
{Folder: dir, FromFilter: fFrom, ToFilter: fFrom, Species: "Kiwi"},
}
for i, in := range cases {
if _, err := CallsPropagateFolder(in); err == nil {
t.Errorf("case %d: expected error for input %+v", i, in)
}
}
}

func TestPropagateFolder_NonexistentFolder(t *testing.T) {
_, err := CallsPropagateFolder(CallsPropagateFolderInput{
Folder: "/nonexistent/path/xyz", FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err == nil {
t.Fatal("expected error for nonexistent folder")
}
}

func TestPropagateFolder_ConflictsTaggedWithFile(t *testing.T) {
dir := t.TempDir()
// Two sources with different calltypes both overlapping one target.
writeFileAt(t, dir, "conflict.wav.data",
seg(100, 125, lbl(fFrom, "Kiwi", "Male", 100)),
seg(110, 130, lbl(fFrom, "Kiwi", "Female", 100)),
seg(100, 130, lbl(fTo, "Kiwi", "", 70)),
)

out, err := CallsPropagateFolder(CallsPropagateFolderInput{
Folder: dir, FromFilter: fFrom, ToFilter: fTo, Species: "Kiwi",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if out.SkippedConflict != 1 || len(out.Conflicts) != 1 {
t.Fatalf("expected one conflict, got %+v", out)
}
if out.Conflicts[0].File == "" {
t.Errorf("conflict should be tagged with file path, got %+v", out.Conflicts[0])
}
}
file addition: calls_propagate.go (----------)

[0.67281]

package calls

import (
"fmt"
"os"

"skraak/utils"
)

type CallsPropagateInput struct {
File string `json:"file"`
FromFilter string `json:"from_filter"`
ToFilter string `json:"to_filter"`
Species string `json:"species"`
}

type CallsPropagateOutput struct {
File string `json:"file"`
FromFilter string `json:"from_filter"`
ToFilter string `json:"to_filter"`
Species string `json:"species"`
FiltersMissing bool `json:"filters_missing,omitempty"`
TargetsExamined int `json:"targets_examined"`
Propagated int `json:"propagated"`
SkippedNoOverlap int `json:"skipped_no_overlap"`
SkippedConflict int `json:"skipped_conflict"`
Conflicts []PropagateConflict `json:"conflicts,omitempty"`
Changes []PropagateChange `json:"changes,omitempty"`
Error string `json:"error,omitempty"`
}

type CallsPropagateFolderInput struct {
Folder string `json:"folder"`
FromFilter string `json:"from_filter"`
ToFilter string `json:"to_filter"`
Species string `json:"species"`
}

type CallsPropagateFolderOutput struct {
Folder string `json:"folder"`
FromFilter string `json:"from_filter"`
ToFilter string `json:"to_filter"`
Species string `json:"species"`
FilesTotal int `json:"files_total"`
FilesWithBothFilters int `json:"files_with_both_filters"`
FilesSkippedNoFilter int `json:"files_skipped_no_filter"`
FilesChanged int `json:"files_changed"`
FilesErrored int `json:"files_errored"`
TargetsExamined int `json:"targets_examined"`
Propagated int `json:"propagated"`
SkippedNoOverlap int `json:"skipped_no_overlap"`
SkippedConflict int `json:"skipped_conflict"`
Conflicts []PropagateConflict `json:"conflicts,omitempty"`
Errors []CallsPropagateOutput `json:"errors,omitempty"`
Error string `json:"error,omitempty"`
}

type PropagateConflict struct {
File string `json:"file,omitempty"`
TargetStart float64 `json:"target_start"`
TargetEnd float64 `json:"target_end"`
TargetCallType string `json:"target_calltype,omitempty"`
SourceChoices []PropagateSourceChoice `json:"source_choices"`
}

type PropagateSourceChoice struct {
Start float64 `json:"start"`
End float64 `json:"end"`
Species string `json:"species"`
CallType string `json:"calltype,omitempty"`
}

type PropagateChange struct {
TargetStart float64 `json:"target_start"`
TargetEnd float64 `json:"target_end"`
PrevSpecies string `json:"prev_species"`
PrevCallType string `json:"prev_calltype,omitempty"`
PrevCertainty int `json:"prev_certainty"`
NewSpecies string `json:"new_species"`
NewCallType string `json:"new_calltype,omitempty"`
NewCertainty int `json:"new_certainty"`
}

// CallsPropagate copies verified classifications (certainty==100) from one filter's
// segments to overlapping target segments of another filter, within a single .data file.
// Target labels with certainty==70 (ML-unverified) or certainty==0 (Don't Know / Noise)
// are updated — targets at certainty==100 (human-verified) and certainty==90 (already
// propagated) are left alone. Only source labels matching --species are considered.
// Propagated target labels are set to certainty=90 and file reviewer is set to "Skraak".
func CallsPropagate(input CallsPropagateInput) (CallsPropagateOutput, error) {
output := CallsPropagateOutput{
File: input.File,
FromFilter: input.FromFilter,
ToFilter: input.ToFilter,
Species: input.Species,
}

if err := validatePropagateInput(&output, input); err != nil {
return output, err
}

df, err := utils.ParseDataFile(input.File)
if err != nil {
output.Error = fmt.Sprintf("parse %s: %v", input.File, err)
return output, fmt.Errorf("%s", output.Error)
}

// Fast path: skip files that don't contain both filters at all.
if !hasBothFilters(df, input.FromFilter, input.ToFilter) {
output.FiltersMissing = true
return output, nil
}

sources := collectPropagateSources(df, input.FromFilter, input.Species)

propagateTargets(df, sources, input, &output)

if output.Propagated > 0 {
df.Meta.Reviewer = "Skraak"
if err := df.Write(input.File); err != nil {
output.Error = fmt.Sprintf("write %s: %v", input.File, err)
return output, fmt.Errorf("%s", output.Error)
}
}

return output, nil
}

// validatePropagateInput checks required fields and file existence
func validatePropagateInput(output *CallsPropagateOutput, input CallsPropagateInput) error {
checks := []struct {
val string
msg string
}{
{input.File, "--file is required"},
{input.FromFilter, "--from is required"},
{input.ToFilter, "--to is required"},
{input.Species, "--species is required"},
}
for _, c := range checks {
if c.val == "" {
output.Error = c.msg
return fmt.Errorf("%s", c.msg)
}
}
if input.FromFilter == input.ToFilter {
output.Error = "--from and --to must differ"
return fmt.Errorf("%s", output.Error)
}
if _, err := os.Stat(input.File); os.IsNotExist(err) {
output.Error = fmt.Sprintf("file not found: %s", input.File)
return fmt.Errorf("%s", output.Error)
}
return nil
}

// hasBothFilters checks whether the data file contains both from and to filters
func hasBothFilters(df *utils.DataFile, fromFilter, toFilter string) bool {
hasFrom, hasTo := false, false
for _, seg := range df.Segments {
for _, lbl := range seg.Labels {
if lbl.Filter == fromFilter {
hasFrom = true
}
if lbl.Filter == toFilter {
hasTo = true
}
if hasFrom && hasTo {
return true
}
}
}
return false
}

// sourceRef pairs a segment with its matching source label
type sourceRef struct {
seg *utils.Segment
label *utils.Label
}

// collectPropagateSources gathers verified source labels (certainty==100) for the given filter/species
func collectPropagateSources(df *utils.DataFile, fromFilter, species string) []sourceRef {
var sources []sourceRef
for _, seg := range df.Segments {
for _, lbl := range seg.Labels {
if lbl.Filter == fromFilter && lbl.Species == species && lbl.Certainty == 100 {
sources = append(sources, sourceRef{seg: seg, label: lbl})
break
}
}
}
return sources
}

// propagateTargets iterates target segments, finds overlapping sources, and applies agreed classifications
func propagateTargets(df *utils.DataFile, sources []sourceRef, input CallsPropagateInput, output *CallsPropagateOutput) {
for _, tSeg := range df.Segments {
toLabel := findUpdatableTargetLabel(tSeg.Labels, input.ToFilter)
if toLabel == nil {
continue
}
output.TargetsExamined++

overlaps := findOverlappingSources(sources, tSeg)
if len(overlaps) == 0 {
output.SkippedNoOverlap++
continue
}

agreedCallType, conflict := resolveCallType(overlaps)
if conflict {
output.SkippedConflict++
output.Conflicts = append(output.Conflicts, buildConflictRecord(tSeg, toLabel, overlaps))
continue
}

applyPropagation(toLabel, input.Species, agreedCallType, tSeg, output)
}
}

// findUpdatableTargetLabel finds a target label with certainty 70 or 0 for the given filter
func findUpdatableTargetLabel(labels []*utils.Label, toFilter string) *utils.Label {
for _, lbl := range labels {
if lbl.Filter == toFilter && (lbl.Certainty == 70 || lbl.Certainty == 0) {
return lbl
}
}
return nil
}

// findOverlappingSources returns sources whose segments overlap with the target segment
func findOverlappingSources(sources []sourceRef, tSeg *utils.Segment) []sourceRef {
var overlaps []sourceRef
for _, s := range sources {
if s.seg.StartTime < tSeg.EndTime && tSeg.StartTime < s.seg.EndTime {
overlaps = append(overlaps, s)
}
}
return overlaps
}

// resolveCallType checks if all overlapping sources agree on a call type.
// Returns the agreed call type and whether there is a conflict.
func resolveCallType(overlaps []sourceRef) (string, bool) {
agreedCallType := overlaps[0].label.CallType
for _, s := range overlaps[1:] {
if s.label.CallType != agreedCallType {
return "", true
}
}
return agreedCallType, false
}

// buildConflictRecord creates a PropagateConflict from overlapping disagreeing sources
func buildConflictRecord(tSeg *utils.Segment, toLabel *utils.Label, overlaps []sourceRef) PropagateConflict {
choices := make([]PropagateSourceChoice, 0, len(overlaps))
for _, s := range overlaps {
choices = append(choices, PropagateSourceChoice{
Start: s.seg.StartTime,
End: s.seg.EndTime,
Species: s.label.Species,
CallType: s.label.CallType,
})
}
return PropagateConflict{
TargetStart: tSeg.StartTime,
TargetEnd: tSeg.EndTime,
TargetCallType: toLabel.CallType,
SourceChoices: choices,
}
}

// applyPropagation updates the target label and records the change
func applyPropagation(toLabel *utils.Label, species, callType string, tSeg *utils.Segment, output *CallsPropagateOutput) {
change := PropagateChange{
TargetStart: tSeg.StartTime,
TargetEnd: tSeg.EndTime,
PrevSpecies: toLabel.Species,
PrevCallType: toLabel.CallType,
PrevCertainty: toLabel.Certainty,
NewSpecies: species,
NewCallType: callType,
NewCertainty: 90,
}

toLabel.Species = species
toLabel.CallType = callType
toLabel.Certainty = 90

output.Propagated++
output.Changes = append(output.Changes, change)
}

// CallsPropagateFolder runs CallsPropagate against every .data file in a folder,
// aggregating counts. Files that do not contain both --from and --to filters are
// skipped silently (counted as files_skipped_no_filter). Parse/write errors on
// individual files are collected in Errors; they don't abort the run.
func CallsPropagateFolder(input CallsPropagateFolderInput) (CallsPropagateFolderOutput, error) {
output := CallsPropagateFolderOutput{
Folder: input.Folder,
FromFilter: input.FromFilter,
ToFilter: input.ToFilter,
Species: input.Species,
}

if input.Folder == "" {
output.Error = "--folder is required"
return output, fmt.Errorf("%s", output.Error)
}
if input.FromFilter == "" {
output.Error = "--from is required"
return output, fmt.Errorf("%s", output.Error)
}
if input.ToFilter == "" {
output.Error = "--to is required"
return output, fmt.Errorf("%s", output.Error)
}
if input.Species == "" {
output.Error = "--species is required"
return output, fmt.Errorf("%s", output.Error)
}
if input.FromFilter == input.ToFilter {
output.Error = "--from and --to must differ"
return output, fmt.Errorf("%s", output.Error)
}

info, err := os.Stat(input.Folder)
if err != nil {
output.Error = fmt.Sprintf("folder not found: %s", input.Folder)
return output, fmt.Errorf("%s", output.Error)
}
if !info.IsDir() {
output.Error = fmt.Sprintf("not a directory: %s", input.Folder)
return output, fmt.Errorf("%s", output.Error)
}

files, err := utils.FindDataFiles(input.Folder)
if err != nil {
output.Error = fmt.Sprintf("list .data files: %v", err)
return output, fmt.Errorf("%s", output.Error)
}
output.FilesTotal = len(files)

for _, f := range files {
fileOut, err := CallsPropagate(CallsPropagateInput{
File: f,
FromFilter: input.FromFilter,
ToFilter: input.ToFilter,
Species: input.Species,
})
if err != nil {
output.FilesErrored++
output.Errors = append(output.Errors, fileOut)
continue
}
if fileOut.FiltersMissing {
output.FilesSkippedNoFilter++
continue
}
output.FilesWithBothFilters++
output.TargetsExamined += fileOut.TargetsExamined
output.Propagated += fileOut.Propagated
output.SkippedNoOverlap += fileOut.SkippedNoOverlap
output.SkippedConflict += fileOut.SkippedConflict
if fileOut.Propagated > 0 {
output.FilesChanged++
}
for _, c := range fileOut.Conflicts {
c.File = f
output.Conflicts = append(output.Conflicts, c)
}
}

return output, nil
}
file addition: calls_modify_test.go (----------)

[0.67281]

package calls

import (
"path/filepath"
"testing"

"skraak/utils"
)

func TestCallsModifyBookmark(t *testing.T) {
// Create a temp .data file with a bookmarked segment
tmpDir := t.TempDir()
tmpFile := filepath.Join(tmpDir, "test.data")

df := &utils.DataFile{
Meta: &utils.DataMeta{Operator: "test", Duration: 60},
Segments: []*utils.Segment{
{
StartTime: 10.0,
EndTime: 15.0,
FreqLow: 100,
FreqHigh: 5000,
Labels: []*utils.Label{
{Species: "Kiwi", Certainty: 80, Filter: "myfilter", CallType: "Duet", Bookmark: true},
},
},
},
}
if err := df.Write(tmpFile); err != nil {
t.Fatalf("failed to write test file: %v", err)
}

// Test 1: Adding bookmark when already true should do nothing
bookmark := true
result, err := CallsModify(CallsModifyInput{
File: tmpFile,
Reviewer: "tester",
Filter: "myfilter",
Segment: "10-15",
Certainty: 80,
Bookmark: &bookmark,
})

// Should return error "no changes needed"
if err == nil {
t.Errorf("expected error 'no changes needed' when bookmark already true, got nil")
}
if result.Error != "No changes needed: all values already match" {
t.Errorf("expected 'no changes needed' error, got: %s", result.Error)
}

// Verify bookmark is still true in the file
df2, err := utils.ParseDataFile(tmpFile)
if err != nil {
t.Fatalf("failed to parse file: %v", err)
}
if !df2.Segments[0].Labels[0].Bookmark {
t.Errorf("bookmark should still be true, got false")
}
}

func TestCallsModifyBookmarkFalse(t *testing.T) {
// Create a temp .data file WITHOUT a bookmark
tmpDir := t.TempDir()
tmpFile := filepath.Join(tmpDir, "test.data")

df := &utils.DataFile{
Meta: &utils.DataMeta{Operator: "test", Duration: 60},
Segments: []*utils.Segment{
{
StartTime: 10.0,
EndTime: 15.0,
FreqLow: 100,
FreqHigh: 5000,
Labels: []*utils.Label{
{Species: "Kiwi", Certainty: 80, Filter: "myfilter", CallType: "Duet", Bookmark: false},
},
},
},
}
if err := df.Write(tmpFile); err != nil {
t.Fatalf("failed to write test file: %v", err)
}

// Test: Adding bookmark when false should set it to true
bookmark := true
result, err := CallsModify(CallsModifyInput{
File: tmpFile,
Reviewer: "tester",
Filter: "myfilter",
Segment: "10-15",
Certainty: 80,
Bookmark: &bookmark,
})

if err != nil {
t.Errorf("unexpected error: %v", err)
}
if result.Bookmark == nil || !*result.Bookmark {
t.Errorf("expected bookmark=true in result, got %v", result.Bookmark)
}

// Verify bookmark is true in the file
df2, err := utils.ParseDataFile(tmpFile)
if err != nil {
t.Fatalf("failed to parse file: %v", err)
}
if !df2.Segments[0].Labels[0].Bookmark {
t.Errorf("bookmark should be true, got false")
}
}

func TestCallsModifyCommentAdditive(t *testing.T) {
// Create a temp .data file with an existing comment
tmpDir := t.TempDir()
tmpFile := filepath.Join(tmpDir, "test.data")

df := &utils.DataFile{
Meta: &utils.DataMeta{Operator: "test", Duration: 60},
Segments: []*utils.Segment{
{
StartTime: 10.0,
EndTime: 15.0,
FreqLow: 100,
FreqHigh: 5000,
Labels: []*utils.Label{
{Species: "Kiwi", Certainty: 80, Filter: "myfilter", Comment: "First observation"},
},
},
},
}
if err := df.Write(tmpFile); err != nil {
t.Fatalf("failed to write test file: %v", err)
}

// Test: Adding comment should be additive
result, err := CallsModify(CallsModifyInput{
File: tmpFile,
Reviewer: "tester",
Filter: "myfilter",
Segment: "10-15",
Certainty: 80,
Comment: "Good example",
})

if err != nil {
t.Errorf("unexpected error: %v", err)
}

expectedComment := "First observation | Good example"
if result.Comment != expectedComment {
t.Errorf("expected comment=%q, got %q", expectedComment, result.Comment)
}

// Verify comment in file
df2, err := utils.ParseDataFile(tmpFile)
if err != nil {
t.Fatalf("failed to parse file: %v", err)
}
if df2.Segments[0].Labels[0].Comment != expectedComment {
t.Errorf("expected comment in file=%q, got %q", expectedComment, df2.Segments[0].Labels[0].Comment)
}
}

func TestCallsModifyCommentAdditiveMultiple(t *testing.T) {
// Create a temp .data file and add multiple comments
tmpDir := t.TempDir()
tmpFile := filepath.Join(tmpDir, "test.data")

df := &utils.DataFile{
Meta: &utils.DataMeta{Operator: "test", Duration: 60},
Segments: []*utils.Segment{
{
StartTime: 10.0,
EndTime: 15.0,
FreqLow: 100,
FreqHigh: 5000,
Labels: []*utils.Label{
{Species: "Kiwi", Certainty: 80, Filter: "myfilter"},
},
},
},
}
if err := df.Write(tmpFile); err != nil {
t.Fatalf("failed to write test file: %v", err)
}

// Add first comment
_, err := CallsModify(CallsModifyInput{
File: tmpFile,
Reviewer: "tester",
Filter: "myfilter",
Segment: "10-15",
Certainty: 80,
Comment: "First",
})
if err != nil {
t.Fatalf("unexpected error on first comment: %v", err)
}

// Add second comment
_, err = CallsModify(CallsModifyInput{
File: tmpFile,
Reviewer: "tester",
Filter: "myfilter",
Segment: "10-15",
Certainty: 80,
Comment: "Second",
})
if err != nil {
t.Fatalf("unexpected error on second comment: %v", err)
}

// Add third comment
result, err := CallsModify(CallsModifyInput{
File: tmpFile,
Reviewer: "tester",
Filter: "myfilter",
Segment: "10-15",
Certainty: 80,
Comment: "Third",
})
if err != nil {
t.Fatalf("unexpected error on third comment: %v", err)
}

expectedComment := "First | Second | Third"
if result.Comment != expectedComment {
t.Errorf("expected comment=%q, got %q", expectedComment, result.Comment)
}
}

func TestCallsModifyCommentTooLong(t *testing.T) {
// Create a temp .data file with an existing long comment
tmpDir := t.TempDir()
tmpFile := filepath.Join(tmpDir, "test.data")

existingComment := "This is a fairly long existing comment that takes up space"
df := &utils.DataFile{
Meta: &utils.DataMeta{Operator: "test", Duration: 60},
Segments: []*utils.Segment{
{
StartTime: 10.0,
EndTime: 15.0,
FreqLow: 100,
FreqHigh: 5000,
Labels: []*utils.Label{
{Species: "Kiwi", Certainty: 80, Filter: "myfilter", Comment: existingComment},
},
},
},
}
if err := df.Write(tmpFile); err != nil {
t.Fatalf("failed to write test file: %v", err)
}

// Test: Adding a long comment that would exceed 140 chars should fail
longNewComment := "This is another very long comment that when combined with the existing one will exceed the limit"
result, err := CallsModify(CallsModifyInput{
File: tmpFile,
Reviewer: "tester",
Filter: "myfilter",
Segment: "10-15",
Certainty: 80,
Comment: longNewComment,
})

if err == nil {
t.Errorf("expected error for combined comment exceeding 140 chars, got nil")
}
if result.Error == "" {
t.Errorf("expected error message, got empty")
}

// Verify original comment is preserved
df2, err := utils.ParseDataFile(tmpFile)
if err != nil {
t.Fatalf("failed to parse file: %v", err)
}
if df2.Segments[0].Labels[0].Comment != existingComment {
t.Errorf("original comment should be preserved, got %q", df2.Segments[0].Labels[0].Comment)
}
}

func TestCallsModifyPreservesBookmarkOnOtherChange(t *testing.T) {
// Create a temp .data file with a bookmark
tmpDir := t.TempDir()
tmpFile := filepath.Join(tmpDir, "test.data")

df := &utils.DataFile{
Meta: &utils.DataMeta{Operator: "test", Duration: 60},
Segments: []*utils.Segment{
{
StartTime: 10.0,
EndTime: 15.0,
FreqLow: 100,
FreqHigh: 5000,
Labels: []*utils.Label{
{Species: "Kiwi", Certainty: 80, Filter: "myfilter", Bookmark: true},
},
},
},
}
if err := df.Write(tmpFile); err != nil {
t.Fatalf("failed to write test file: %v", err)
}

// Change certainty (without passing --bookmark) - bookmark should be preserved
result, err := CallsModify(CallsModifyInput{
File: tmpFile,
Reviewer: "tester",
Filter: "myfilter",
Segment: "10-15",
Certainty: 100,
// No Bookmark set
})

if err != nil {
t.Errorf("unexpected error: %v", err)
}
if result.Bookmark != nil {
t.Errorf("bookmark should not be in output when not changed, got %v", result.Bookmark)
}

// Verify bookmark is still true in the file
df2, err := utils.ParseDataFile(tmpFile)
if err != nil {
t.Fatalf("failed to parse file: %v", err)
}
if !df2.Segments[0].Labels[0].Bookmark {
t.Errorf("bookmark should still be true after changing certainty, got false")
}
}

func TestCallsModifyInvalidSegment(t *testing.T) {
tmpDir := t.TempDir()
tmpFile := filepath.Join(tmpDir, "test.data")

df := &utils.DataFile{
Meta: &utils.DataMeta{Operator: "test", Duration: 60},
Segments: []*utils.Segment{
{
StartTime: 10.0,
EndTime: 15.0,
FreqLow: 100,
FreqHigh: 5000,
Labels: []*utils.Label{
{Species: "Kiwi", Certainty: 80, Filter: "myfilter"},
},
},
},
}
if err := df.Write(tmpFile); err != nil {
t.Fatalf("failed to write test file: %v", err)
}

// Test: Non-existent segment should error
result, err := CallsModify(CallsModifyInput{
File: tmpFile,
Reviewer: "tester",
Filter: "myfilter",
Segment: "99-100",
Certainty: 80,
})

if err == nil {
t.Errorf("expected error for non-existent segment, got nil")
}
if result.Error == "" {
t.Errorf("expected error message, got empty")
}
}
file addition: calls_modify.go (----------)

[0.67281]

package calls

import (
"fmt"
"math"
"os"
"strings"

"skraak/utils"
)

// CallsModifyInput defines the input for the modify tool
type CallsModifyInput struct {
File string `json:"file"`
Reviewer string `json:"reviewer"`
Filter string `json:"filter"`
Segment string `json:"segment"`
Certainty int `json:"certainty"`
Species string `json:"species"`
Bookmark *bool `json:"bookmark"`
Comment string `json:"comment"`
}

// CallsModifyOutput defines the output for the modify tool
type CallsModifyOutput struct {
File string `json:"file"`
SegmentStart int `json:"segment_start"`
SegmentEnd int `json:"segment_end"`
Species string `json:"species,omitempty"`
CallType string `json:"calltype,omitempty"`
Certainty int `json:"certainty,omitempty"`
Bookmark *bool `json:"bookmark,omitempty"`
Comment string `json:"comment,omitempty"`
PreviousValue string `json:"previous_value,omitempty"`
Error string `json:"error,omitempty"`
}

// validateModifyInput checks required fields and comment constraints.
func validateModifyInput(input CallsModifyInput) error {
if input.File == "" {
return fmt.Errorf("--file is required")
}
if input.Reviewer == "" {
return fmt.Errorf("--reviewer is required")
}
if input.Filter == "" {
return fmt.Errorf("--filter is required")
}
if input.Segment == "" {
return fmt.Errorf("--segment is required")
}
if len(input.Comment) > 140 {
return fmt.Errorf("--comment must be 140 characters or less")
}
for i, r := range input.Comment {
if r > 127 {
return fmt.Errorf("--comment must be ASCII only (non-ASCII at position %d)", i)
}
}
return nil
}

// resolveSpecies parses species+calltype from the input species string.
// If input species is empty, keeps the existing label values.
func resolveSpecies(inputSpecies string, label *utils.Label) (species, callType string) {
if inputSpecies == "" {
return label.Species, label.CallType
}
if before, after, ok := strings.Cut(inputSpecies, "+"); ok {
return before, after
}
return inputSpecies, ""
}

// hasModifyChanges checks whether any field would actually change.
func hasModifyChanges(newSpecies, newCallType string, input CallsModifyInput, label *utils.Label) bool {
if newSpecies != label.Species || newCallType != label.CallType {
return true
}
if input.Certainty != label.Certainty {
return true
}
if input.Bookmark != nil && *input.Bookmark != label.Bookmark {
return true
}
if input.Comment != "" {
return true
}
return false
}

// applyLabelChanges updates the label and data file, populating the output.
func applyLabelChanges(label *utils.Label, dataFile *utils.DataFile, input CallsModifyInput, newSpecies, newCallType string, output *CallsModifyOutput) error {
dataFile.Meta.Reviewer = input.Reviewer

label.Species = newSpecies
label.CallType = newCallType
output.Species = newSpecies
output.CallType = newCallType

label.Certainty = input.Certainty
output.Certainty = input.Certainty

if input.Bookmark != nil && *input.Bookmark != label.Bookmark {
label.Bookmark = *input.Bookmark
output.Bookmark = input.Bookmark
}

if input.Comment != "" {
var newComment string
if label.Comment != "" {
newComment = label.Comment + " | " + input.Comment
} else {
newComment = input.Comment
}
if len(newComment) > 140 {
return fmt.Errorf("combined comment exceeds 140 characters (%d)", len(newComment))
}
label.Comment = newComment
output.Comment = newComment
}

return nil
}

// CallsModify modifies a label in a .data file
func CallsModify(input CallsModifyInput) (CallsModifyOutput, error) {
var output CallsModifyOutput

if err := validateModifyInput(input); err != nil {
output.Error = err.Error()
return output, err
}

startTime, endTime, err := parseSegmentRange(input.Segment)
if err != nil {
output.Error = err.Error()
return output, err
}

output.File = input.File
output.SegmentStart = startTime
output.SegmentEnd = endTime

if _, err := os.Stat(input.File); os.IsNotExist(err) {
output.Error = fmt.Sprintf("File not found: %s", input.File)
return output, fmt.Errorf("%s", output.Error)
}

dataFile, err := utils.ParseDataFile(input.File)
if err != nil {
output.Error = fmt.Sprintf("Failed to parse file: %v", err)
return output, fmt.Errorf("%s", output.Error)
}

segment := findSegment(dataFile.Segments, startTime, endTime, input.Filter)
if segment == nil {
output.Error = fmt.Sprintf("No segment found matching time range %d-%d", startTime, endTime)
return output, fmt.Errorf("%s", output.Error)
}

targetLabel := findLabelByFilter(segment, input.Filter)
if targetLabel == nil {
output.Error = fmt.Sprintf("No label found with filter '%s' in segment %d-%d", input.Filter, startTime, endTime)
return output, fmt.Errorf("%s", output.Error)
}

output.PreviousValue = formatLabel(targetLabel)

newSpecies, newCallType := resolveSpecies(input.Species, targetLabel)

if !hasModifyChanges(newSpecies, newCallType, input, targetLabel) {
output.Error = "No changes needed: all values already match"
return output, fmt.Errorf("%s", output.Error)
}

if err := applyLabelChanges(targetLabel, dataFile, input, newSpecies, newCallType, &output); err != nil {
output.Error = err.Error()
return output, err
}

if err := dataFile.Write(input.File); err != nil {
output.Error = fmt.Sprintf("Failed to save file: %v", err)
return output, fmt.Errorf("%s", output.Error)
}

return output, nil
}

// findLabelByFilter finds the first label matching the given filter in a segment.
func findLabelByFilter(segment *utils.Segment, filter string) *utils.Label {
for _, label := range segment.Labels {
if label.Filter == filter {
return label
}
}
return nil
}

// parseSegmentRange parses "12-15" format into start and end integers
func parseSegmentRange(s string) (int, int, error) {
parts := strings.Split(s, "-")
if len(parts) != 2 {
return 0, 0, fmt.Errorf("invalid segment format: %s (expected start-end, e.g., 12-15)", s)
}

var start, end int
if _, err := fmt.Sscanf(parts[0], "%d", &start); err != nil {
return 0, 0, fmt.Errorf("invalid start time: %s", parts[0])
}
if _, err := fmt.Sscanf(parts[1], "%d", &end); err != nil {
return 0, 0, fmt.Errorf("invalid end time: %s", parts[1])
}

if start < 0 || end < 0 {
return 0, 0, fmt.Errorf("times must be non-negative")
}
if start >= end {
return 0, 0, fmt.Errorf("start time must be less than end time")
}

return start, end, nil
}

// findSegment finds a segment matching the time range using floor/ceil matching.
// It also checks that the segment contains a label with the specified filter,
// so that duplicate segments (same time range, different filters) are resolved correctly.
func findSegment(segments []*utils.Segment, startTime, endTime int, filter string) *utils.Segment {
for _, seg := range segments {
segStart := int(math.Floor(seg.StartTime))
segEnd := int(math.Ceil(seg.EndTime))
if segEnd == segStart {
segEnd = segStart + 1 // minimum 1 second
}
if segStart == startTime && segEnd == endTime {
for _, label := range seg.Labels {
if label.Filter == filter {
return seg
}
}
}
}
return nil
}

// formatLabel formats a label for display
func formatLabel(label *utils.Label) string {
result := label.Species
if label.CallType != "" {
result += "+" + label.CallType
}
result += fmt.Sprintf(" (%d%%)", label.Certainty)
return result
}
file addition: calls_from_raven.go (----------)

[0.67281]

package calls

import (
"bufio"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"

"skraak/utils"
)

// CallsFromRavenInput defines the input for the calls-from-raven tool
type CallsFromRavenInput struct {
Folder string `json:"folder"`
File string `json:"file"`
Delete bool `json:"delete"`
ProgressHandler ProgressHandler `json:"-"` // Optional progress callback
}

// CallsFromRavenOutput defines the output for the calls-from-raven tool
type CallsFromRavenOutput struct {
Calls []ClusteredCall `json:"calls"`
TotalCalls int `json:"total_calls"`
SpeciesCount map[string]int `json:"species_count"`
DataFilesWritten int `json:"data_files_written"`
DataFilesSkipped int `json:"data_files_skipped"`
FilesProcessed int `json:"files_processed"`
FilesDeleted int `json:"files_deleted"`
Filter string `json:"filter"`
Error *string `json:"error,omitempty"`
}

// ravenSource implements CallSource for Raven selection files
type ravenSource struct{}

func (ravenSource) Name() string { return "Raven" }

func (ravenSource) FindFiles(folder string) ([]string, error) {
var files []string

entries, err := os.ReadDir(folder)
if err != nil {
return nil, err
}

for _, entry := range entries {
name := entry.Name()
if strings.HasSuffix(name, ".selections.txt") {
files = append(files, filepath.Join(folder, name))
}
}

return files, nil
}

func (ravenSource) ProcessFile(ravenFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
return processRavenFileCached(ravenFile, cache)
}

// CallsFromRaven processes Raven selection files and writes .data files
func CallsFromRaven(input CallsFromRavenInput) (CallsFromRavenOutput, error) {
src := ravenSource{}
commonInput := CallsFromSourceInput(input)

commonOutput, err := callsFromSource(src, commonInput)

// Convert to Raven-specific output type
var output CallsFromRavenOutput
output.Calls = commonOutput.Calls
output.TotalCalls = commonOutput.TotalCalls
output.SpeciesCount = commonOutput.SpeciesCount
output.DataFilesWritten = commonOutput.DataFilesWritten
output.DataFilesSkipped = commonOutput.DataFilesSkipped
output.FilesProcessed = commonOutput.FilesProcessed
output.FilesDeleted = commonOutput.FilesDeleted
output.Filter = commonOutput.Filter
output.Error = commonOutput.Error
return output, err
}

// RavenSelection represents a single Raven selection
type RavenSelection struct {
StartTime float64
EndTime float64
FreqLow float64
FreqHigh float64
Species string
}

// ravenColumnIndices holds the column index positions for a Raven file
type ravenColumnIndices struct {
beginTimeIdx int
endTimeIdx int
lowFreqIdx int
highFreqIdx int
speciesIdx int
}

// parseRavenHeader finds column indices from a tab-separated header line
func parseRavenHeader(header []string) (ravenColumnIndices, error) {
idx := ravenColumnIndices{beginTimeIdx: -1, endTimeIdx: -1, lowFreqIdx: -1, highFreqIdx: -1, speciesIdx: -1}
for i, col := range header {
switch col {
case "Begin Time (s)":
idx.beginTimeIdx = i
case "End Time (s)":
idx.endTimeIdx = i
case "Low Freq (Hz)":
idx.lowFreqIdx = i
case "High Freq (Hz)":
idx.highFreqIdx = i
case "Species":
idx.speciesIdx = i
}
}
if idx.beginTimeIdx == -1 || idx.endTimeIdx == -1 || idx.speciesIdx == -1 {
return idx, fmt.Errorf("missing required columns in Raven file")
}
return idx, nil
}

// parseRavenSelections reads all selection rows from a scanner and returns parsed selections
func parseRavenSelections(scanner *bufio.Scanner, idx ravenColumnIndices) ([]RavenSelection, error) {
var selections []RavenSelection
for scanner.Scan() {
line := scanner.Text()
if line == "" {
continue
}

fields := strings.Split(line, "\t")
if len(fields) <= idx.speciesIdx {
continue
}

sel, err := parseRavenRow(fields, idx)
if err != nil {
return nil, err
}
selections = append(selections, sel)
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("error reading file: %w", err)
}
return selections, nil
}

// parseRavenRow parses a single tab-separated row into a RavenSelection
func parseRavenRow(fields []string, idx ravenColumnIndices) (RavenSelection, error) {
var sel RavenSelection
startTime, err := strconv.ParseFloat(fields[idx.beginTimeIdx], 64)
if err != nil {
return sel, fmt.Errorf("failed to parse begin time %q: %w", fields[idx.beginTimeIdx], err)
}
sel.StartTime = startTime

endTime, err := strconv.ParseFloat(fields[idx.endTimeIdx], 64)
if err != nil {
return sel, fmt.Errorf("failed to parse end time %q: %w", fields[idx.endTimeIdx], err)
}
sel.EndTime = endTime

if idx.lowFreqIdx >= 0 && idx.lowFreqIdx < len(fields) {
freqLow, err := strconv.ParseFloat(fields[idx.lowFreqIdx], 64)
if err != nil {
return sel, fmt.Errorf("failed to parse low freq %q: %w", fields[idx.lowFreqIdx], err)
}
sel.FreqLow = freqLow
}
if idx.highFreqIdx >= 0 && idx.highFreqIdx < len(fields) {
freqHigh, err := strconv.ParseFloat(fields[idx.highFreqIdx], 64)
if err != nil {
return sel, fmt.Errorf("failed to parse high freq %q: %w", fields[idx.highFreqIdx], err)
}
sel.FreqHigh = freqHigh
}
sel.Species = fields[idx.speciesIdx]
return sel, nil
}

// deriveWAVBaseName extracts the base WAV filename from a Raven .selections.txt filename
func deriveWAVBaseName(ravenFile string) string {
base := filepath.Base(ravenFile)
nameWithoutSuffix := strings.TrimSuffix(base, ".selections.txt")
idx := strings.Index(nameWithoutSuffix, ".Table.")
if idx > 0 {
nameWithoutSuffix = nameWithoutSuffix[:idx]
}
return nameWithoutSuffix
}

// processRavenFileCached processes a single Raven selection file using a DirCache for WAV lookup
func processRavenFileCached(ravenFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
file, err := os.Open(ravenFile)
if err != nil {
return nil, false, false, fmt.Errorf("failed to open file: %w", err)
}
defer func() { _ = file.Close() }()

scanner := bufio.NewScanner(file)

if !scanner.Scan() {
return nil, false, false, fmt.Errorf("empty file")
}
header := strings.Split(scanner.Text(), "\t")

idx, err := parseRavenHeader(header)
if err != nil {
return nil, false, false, err
}

selections, err := parseRavenSelections(scanner, idx)
if err != nil {
return nil, false, false, err
}

if len(selections) == 0 {
return nil, false, true, nil
}

// Find WAV file
wavPath := resolveWAVPath(ravenFile, cache)
if wavPath == "" {
return nil, false, true, nil
}

sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
if err != nil {
return nil, false, true, nil
}

dataPath := wavPath + ".data"
segments := buildRavenSegments(selections, sampleRate)

meta := AviaNZMeta{Operator: "Raven", Duration: duration}
reviewer := "None"
meta.Reviewer = &reviewer

if err := writeDotDataFileSafe(dataPath, segments, "Raven", meta); err != nil {
return nil, false, false, err
}

var calls []ClusteredCall
for _, sel := range selections {
calls = append(calls, ClusteredCall{
File: wavPath,
StartTime: sel.StartTime,
EndTime: sel.EndTime,
EbirdCode: sel.Species,
Segments: 1,
})
}

return calls, true, false, nil
}

// resolveWAVPath finds the WAV file corresponding to a Raven file
func resolveWAVPath(ravenFile string, cache *DirCache) string {
baseName := deriveWAVBaseName(ravenFile)
if cache != nil {
return cache.FindWAV(baseName)
}
return findWAVFile(filepath.Dir(ravenFile), baseName)
}

// buildRavenSegments converts Raven selections to AviaNZ segments
func buildRavenSegments(selections []RavenSelection, sampleRate int) []AviaNZSegment {
var segments []AviaNZSegment

for _, sel := range selections {
labels := []AviaNZLabel{
{
Species: sel.Species,
Certainty: 70, // Default certainty for Raven (no confidence metric)
Filter: "Raven",
},
}

// Use frequency range from Raven, or full band if not specified
freqLow := sel.FreqLow
freqHigh := sel.FreqHigh
if freqLow == 0 && freqHigh == 0 {
freqHigh = float64(sampleRate)
}

segment := AviaNZSegment{
sel.StartTime,
sel.EndTime,
freqLow,
freqHigh,
labels,
}
segments = append(segments, segment)
}

return segments
}
file addition: calls_from_preds_test.go (----------)

[0.67281]

package calls

import (
"os"
"path/filepath"
"testing"

"skraak/utils"
)

func TestCallsFromPreds_EmptyFilterError(t *testing.T) {
// Create a temp CSV file
tmpDir := t.TempDir()
csvPath := filepath.Join(tmpDir, "preds.csv")
csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
t.Fatal(err)
}

// Create a dummy WAV file (minimal valid WAV)
wavPath := filepath.Join(tmpDir, "test.wav")
createMinimalWAV(t, wavPath, 44100, 10.0)

// Test with empty filter (should error)
input := CallsFromPredsInput{
CSVPath: csvPath,
Filter: "",
WriteDotData: true,
ProgressHandler: nil,
}

output, err := CallsFromPreds(input)

// Should return error
if err == nil {
t.Error("expected error for empty filter, got nil")
}
if output.Error == nil || *output.Error == "" {
t.Error("expected error message in output, got empty")
}
}

func TestCallsFromPreds_NewDataFile(t *testing.T) {
// Create a temp CSV file
tmpDir := t.TempDir()
csvPath := filepath.Join(tmpDir, "predsST_test-filter_2025-01-01.csv")
csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
t.Fatal(err)
}

// Create a dummy WAV file
wavPath := filepath.Join(tmpDir, "test.wav")
createMinimalWAV(t, wavPath, 44100, 10.0)

// Test with filter parsed from filename
input := CallsFromPredsInput{
CSVPath: csvPath,
Filter: "", // Will parse from filename
WriteDotData: true,
ProgressHandler: nil,
}

output, err := CallsFromPreds(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.DataFilesWritten != 1 {
t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
}
if output.Filter != "test-filter" {
t.Errorf("expected filter 'test-filter', got '%s'", output.Filter)
}

// Verify .data file was created
dataPath := wavPath + ".data"
if _, err := os.Stat(dataPath); os.IsNotExist(err) {
t.Error("expected .data file to be created")
}

// Verify content
df, err := utils.ParseDataFile(dataPath)
if err != nil {
t.Fatalf("failed to parse .data file: %v", err)
}
if len(df.Segments) != 1 {
t.Errorf("expected 1 segment, got %d", len(df.Segments))
}
if len(df.Segments[0].Labels) != 1 {
t.Errorf("expected 1 label, got %d", len(df.Segments[0].Labels))
}
if df.Segments[0].Labels[0].Filter != "test-filter" {
t.Errorf("expected filter 'test-filter', got '%s'", df.Segments[0].Labels[0].Filter)
}
}

func TestCallsFromPreds_ExistingDataFileSameFilter(t *testing.T) {
// Create a temp CSV file
tmpDir := t.TempDir()
csvPath := filepath.Join(tmpDir, "predsST_existing-filter_2025-01-01.csv")
csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
t.Fatal(err)
}

// Create a dummy WAV file
wavPath := filepath.Join(tmpDir, "test.wav")
createMinimalWAV(t, wavPath, 44100, 10.0)

// Create existing .data file with same filter
dataPath := wavPath + ".data"
existingData := `[
{"Operator": "Manual", "Reviewer": "David", "Duration": 10.0},
[5.0, 8.0, 0, 44100, [{"species": "morepork", "certainty": 90, "filter": "existing-filter"}]]
]`
if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
t.Fatal(err)
}

// Test with same filter (should error)
input := CallsFromPredsInput{
CSVPath: csvPath,
Filter: "", // Will parse from filename -> "existing-filter"
WriteDotData: true,
ProgressHandler: nil,
}

output, err := CallsFromPreds(input)

// Should return error
if err == nil {
t.Error("expected error for same filter, got nil")
}
if output.Error == nil {
t.Error("expected error message in output")
}

// Verify original .data file is unchanged
df, err := utils.ParseDataFile(dataPath)
if err != nil {
t.Fatalf("failed to parse .data file: %v", err)
}
if len(df.Segments) != 1 {
t.Errorf("expected original 1 segment, got %d", len(df.Segments))
}
if df.Segments[0].Labels[0].Species != "morepork" {
t.Errorf("expected original species 'morepork', got '%s'", df.Segments[0].Labels[0].Species)
}
}

func TestCallsFromPreds_ExistingDataFileDifferentFilter(t *testing.T) {
// Create a temp CSV file
tmpDir := t.TempDir()
csvPath := filepath.Join(tmpDir, "predsST_new-filter_2025-01-01.csv")
csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
t.Fatal(err)
}

// Create a dummy WAV file
wavPath := filepath.Join(tmpDir, "test.wav")
createMinimalWAV(t, wavPath, 44100, 10.0)

// Create existing .data file with different filter
dataPath := wavPath + ".data"
existingData := `[
{"Operator": "Manual", "Reviewer": "David", "Duration": 10.0},
[5.0, 8.0, 0, 44100, [{"species": "morepork", "certainty": 90, "filter": "old-filter"}]]
]`
if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
t.Fatal(err)
}

// Test with different filter (should merge)
input := CallsFromPredsInput{
CSVPath: csvPath,
Filter: "", // Will parse from filename -> "new-filter"
WriteDotData: true,
ProgressHandler: nil,
}

output, err := CallsFromPreds(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.DataFilesWritten != 1 {
t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
}

// Verify .data file has merged content
df, err := utils.ParseDataFile(dataPath)
if err != nil {
t.Fatalf("failed to parse .data file: %v", err)
}
if len(df.Segments) != 2 {
t.Errorf("expected 2 segments after merge, got %d", len(df.Segments))
}

// Check segments are sorted by start time
if df.Segments[0].StartTime > df.Segments[1].StartTime {
t.Error("expected segments to be sorted by start time")
}

// Check both filters are present
filters := make(map[string]bool)
for _, seg := range df.Segments {
for _, label := range seg.Labels {
filters[label.Filter] = true
}
}
if !filters["old-filter"] {
t.Error("expected 'old-filter' to be present")
}
if !filters["new-filter"] {
t.Error("expected 'new-filter' to be present")
}
}

func TestCallsFromPreds_ExistingDataFileParseError(t *testing.T) {
// Create a temp CSV file
tmpDir := t.TempDir()
csvPath := filepath.Join(tmpDir, "predsST_test-filter_2025-01-01.csv")
csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
t.Fatal(err)
}

// Create a dummy WAV file
wavPath := filepath.Join(tmpDir, "test.wav")
createMinimalWAV(t, wavPath, 44100, 10.0)

// Create corrupted .data file
dataPath := wavPath + ".data"
corruptedData := `this is not valid json`
if err := os.WriteFile(dataPath, []byte(corruptedData), 0644); err != nil {
t.Fatal(err)
}

// Test (should error due to parse failure)
input := CallsFromPredsInput{
CSVPath: csvPath,
Filter: "",
WriteDotData: true,
ProgressHandler: nil,
}

output, err := CallsFromPreds(input)

// Should return error
if err == nil {
t.Error("expected error for corrupted .data file, got nil")
}
if output.Error == nil {
t.Error("expected error message in output")
}

// Verify original file is unchanged
content, err := os.ReadFile(dataPath)
if err != nil {
t.Fatal(err)
}
if string(content) != corruptedData {
t.Error("expected corrupted file to remain unchanged")
}
}

func TestCallsFromPreds_ExplicitFilter(t *testing.T) {
// Create a temp CSV file with non-standard name
tmpDir := t.TempDir()
csvPath := filepath.Join(tmpDir, "predictions.csv")
csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
t.Fatal(err)
}

// Create a dummy WAV file
wavPath := filepath.Join(tmpDir, "test.wav")
createMinimalWAV(t, wavPath, 44100, 10.0)

// Test with explicit filter
input := CallsFromPredsInput{
CSVPath: csvPath,
Filter: "my-custom-filter",
WriteDotData: true,
ProgressHandler: nil,
}

output, err := CallsFromPreds(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.Filter != "my-custom-filter" {
t.Errorf("expected filter 'my-custom-filter', got '%s'", output.Filter)
}

// Verify .data file uses explicit filter
dataPath := wavPath + ".data"
df, err := utils.ParseDataFile(dataPath)
if err != nil {
t.Fatalf("failed to parse .data file: %v", err)
}
if df.Segments[0].Labels[0].Filter != "my-custom-filter" {
t.Errorf("expected filter 'my-custom-filter' in .data file, got '%s'", df.Segments[0].Labels[0].Filter)
}
}

func TestCallsFromPreds_NonParsableFilenameNoFilter(t *testing.T) {
// Create a temp CSV file with non-standard name that can't be parsed
tmpDir := t.TempDir()
csvPath := filepath.Join(tmpDir, "random_name.csv")
csvContent := "file,start_time,end_time,kiwi\n./test.wav,0.0,3.0,1\n"
if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil {
t.Fatal(err)
}

// Create a dummy WAV file
wavPath := filepath.Join(tmpDir, "test.wav")
createMinimalWAV(t, wavPath, 44100, 10.0)

// Test with no filter and non-parsable filename (should error)
input := CallsFromPredsInput{
CSVPath: csvPath,
Filter: "",
WriteDotData: true,
ProgressHandler: nil,
}

output, err := CallsFromPreds(input)

// Should return error
if err == nil {
t.Error("expected error for unparsable filename with no filter, got nil")
}
if output.Error == nil {
t.Error("expected error message in output")
}
}

// createMinimalWAV creates a minimal valid WAV file for testing
func createMinimalWAV(t *testing.T, path string, sampleRate int, duration float64) {
t.Helper()

numSamples := int(float64(sampleRate) * duration)
dataSize := numSamples * 2 // 16-bit mono

// WAV header (44 bytes)
header := make([]byte, 44)

// RIFF header
copy(header[0:4], "RIFF")
totalSize := uint32(36 + dataSize)
header[4] = byte(totalSize)
header[5] = byte(totalSize >> 8)
header[6] = byte(totalSize >> 16)
header[7] = byte(totalSize >> 24)
copy(header[8:12], "WAVE")

// fmt chunk
copy(header[12:16], "fmt ")
chunkSize := uint32(16)
header[16] = byte(chunkSize)
header[17] = byte(chunkSize >> 8)
header[18] = byte(chunkSize >> 16)
header[19] = byte(chunkSize >> 24)
audioFormat := uint16(1) // PCM
header[20] = byte(audioFormat)
header[21] = byte(audioFormat >> 8)
numChannels := uint16(1)
header[22] = byte(numChannels)
header[23] = byte(numChannels >> 8)
header[24] = byte(sampleRate)
header[25] = byte(sampleRate >> 8)
header[26] = byte(sampleRate >> 16)
header[27] = byte(sampleRate >> 24)
byteRate := uint32(sampleRate * 2)
header[28] = byte(byteRate)
header[29] = byte(byteRate >> 8)
header[30] = byte(byteRate >> 16)
header[31] = byte(byteRate >> 24)
blockAlign := uint16(2)
header[32] = byte(blockAlign)
header[33] = byte(blockAlign >> 8)
bitsPerSample := uint16(16)
header[34] = byte(bitsPerSample)
header[35] = byte(bitsPerSample >> 8)

// data chunk
copy(header[36:40], "data")
header[40] = byte(dataSize)
header[41] = byte(dataSize >> 8)
header[42] = byte(dataSize >> 16)
header[43] = byte(dataSize >> 24)

// Create file with header and silence
file, err := os.Create(path)
if err != nil {
t.Fatal(err)
}
defer file.Close()

if _, err := file.Write(header); err != nil {
t.Fatal(err)
}

// Write silence (zeros)
silence := make([]byte, dataSize)
if _, err := file.Write(silence); err != nil {
t.Fatal(err)
}
}
file addition: calls_from_preds.go (----------)

[0.67281]

package calls

import (
"encoding/csv"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"sync"
"sync/atomic"

"skraak/utils"
)

// Constants for clustering algorithm
const (
CLUSTER_GAP_MULTIPLIER = 2 // 3 Gap threshold = CLUSTER_GAP_MULTIPLIER * clip_duration. 3 for kiwi
MIN_DETECTIONS_PER_CLUSTER = 0 // 1 = filter out single detections (used for kiwi, they have long calls 30s), 0 = let single detections pass through
DEFAULT_CERTAINTY = 70 // .data certainty:70
DOT_DATA_WORKERS = 8 // Number of parallel workers for .data file writing
)

// ClusteredCall represents a clustered bird call detection
type ClusteredCall struct {
File string `json:"file"`
StartTime float64 `json:"start_time"`
EndTime float64 `json:"end_time"`
EbirdCode string `json:"ebird_code"`
Segments int `json:"segments"`
}

// CallsFromPredsInput defines the input for the calls-from-preds tool
type CallsFromPredsInput struct {
CSVPath string `json:"csv_path"`
Filter string `json:"filter"`
WriteDotData bool `json:"write_dot_data"`
GapMultiplier int `json:"gap_multiplier"`
MinDetections int `json:"min_detections"`
ProgressHandler ProgressHandler `json:"-"` // Optional progress callback (not serialized)
}

// ProgressHandler is a callback function for reporting progress during long operations
// processed: number of items processed so far
// total: total number of items to process
// message: optional status message
type ProgressHandler func(processed, total int, message string)

// CallsFromPredsOutput defines the output for the calls-from-preds tool
type CallsFromPredsOutput struct {
Calls []ClusteredCall `json:"calls"`
TotalCalls int `json:"total_calls"`
ClipDuration float64 `json:"clip_duration"`
GapThreshold float64 `json:"gap_threshold"`
SpeciesCount map[string]int `json:"species_count"`
DataFilesWritten int `json:"data_files_written"`
DataFilesSkipped int `json:"data_files_skipped"`
Filter string `json:"filter"`
Error *string `json:"error,omitempty"`
}

// AviaNZ .data file types

// predFileSpeciesKey groups detections by file and ebird code
type predFileSpeciesKey struct {
File string
EbirdCode string
}

// CallsFromPreds reads a predictions CSV and clusters detections into continuous bird calls
func CallsFromPreds(input CallsFromPredsInput) (CallsFromPredsOutput, error) {
var output CallsFromPredsOutput

// Determine filter: use provided filter, or parse from CSV filename
filter := input.Filter
if filter == "" {
filter = ParseFilterFromFilename(input.CSVPath)
}
if filter == "" {
errMsg := "Filter must be specified via --filter flag or parsable from CSV filename"
output.Error = &errMsg
return output, fmt.Errorf("%s", errMsg)
}
output.Filter = filter

_, detections, clipDuration, err := readPredCSV(input.CSVPath)
if err != nil {
errMsg := err.Error()
output.Error = &errMsg
return output, err
}
output.ClipDuration = clipDuration

gapMultiplier := CLUSTER_GAP_MULTIPLIER
if input.GapMultiplier > 0 {
gapMultiplier = input.GapMultiplier
}
minDetections := MIN_DETECTIONS_PER_CLUSTER
if input.MinDetections >= 0 {
minDetections = input.MinDetections
}
gapThreshold := float64(gapMultiplier) * clipDuration
output.GapThreshold = gapThreshold

allCalls, speciesCount := clusterDetections(detections, clipDuration, gapThreshold, minDetections)

output.Calls = allCalls
output.TotalCalls = len(allCalls)
output.SpeciesCount = speciesCount

if input.WriteDotData {
dataFilesWritten, dataFilesSkipped, err := writeDotFiles(input.CSVPath, filter, allCalls, input.ProgressHandler)
if err != nil {
errMsg := fmt.Sprintf("Error writing .data files: %v", err)
output.Error = &errMsg
return output, fmt.Errorf("%s", errMsg)
}
output.DataFilesWritten = dataFilesWritten
output.DataFilesSkipped = dataFilesSkipped
}

return output, nil
}

// readPredCSV opens and reads a predictions CSV, returning column mappings, detections, and clip duration
func readPredCSV(csvPath string) (predCSVColumns, map[predFileSpeciesKey][]float64, float64, error) {
file, err := os.Open(csvPath)
if err != nil {
return predCSVColumns{}, nil, 0, fmt.Errorf("failed to open CSV file: %w", err)
}
defer func() { _ = file.Close() }()

reader := csv.NewReader(file)
reader.ReuseRecord = true

header, err := reader.Read()
if err != nil {
return predCSVColumns{}, nil, 0, fmt.Errorf("failed to read CSV header: %w", err)
}

cols, err := findPredCSVColumns(header)
if err != nil {
return predCSVColumns{}, nil, 0, err
}

detections, clipDuration, err := readPredCSVRows(reader, cols)
if err != nil {
return predCSVColumns{}, nil, 0, err
}

return cols, detections, clipDuration, nil
}

// predCSVColumns holds the column indices for a predictions CSV
type predCSVColumns struct {
fileIdx int
startTimeIdx int
endTimeIdx int
ebirdCodes []string
ebirdIdx []int
}

// findPredCSVColumns parses the CSV header to find column indices
func findPredCSVColumns(header []string) (predCSVColumns, error) {
cols := predCSVColumns{
fileIdx: -1,
startTimeIdx: -1,
endTimeIdx: -1,
}

ignoredColumns := map[string]bool{"NotKiwi": true, "0.0": true}

for i, col := range header {
switch col {
case "file":
cols.fileIdx = i
case "start_time":
cols.startTimeIdx = i
case "end_time":
cols.endTimeIdx = i
default:
if ignoredColumns[col] {
continue
}
cols.ebirdCodes = append(cols.ebirdCodes, col)
cols.ebirdIdx = append(cols.ebirdIdx, i)
}
}

if cols.fileIdx == -1 || cols.startTimeIdx == -1 || cols.endTimeIdx == -1 {
return cols, fmt.Errorf("CSV must have 'file', 'start_time', and 'end_time' columns")
}
if len(cols.ebirdCodes) == 0 {
return cols, fmt.Errorf("CSV must have at least one ebird code column")
}
return cols, nil
}

// readPredCSVRows reads all CSV data rows and returns detections grouped by file+species, plus clip duration
func readPredCSVRows(reader *csv.Reader, cols predCSVColumns) (map[predFileSpeciesKey][]float64, float64, error) {
detections := make(map[predFileSpeciesKey][]float64)
clipDuration := 0.0

record, err := reader.Read()
if err == io.EOF {
return detections, 0, nil
}
if err != nil {
return nil, 0, fmt.Errorf("failed to read first CSV row: %w", err)
}

startTime, _ := strconv.ParseFloat(record[cols.startTimeIdx], 64)
endTime, _ := strconv.ParseFloat(record[cols.endTimeIdx], 64)
clipDuration = endTime - startTime

addDetectionsFromRow(record, cols, startTime, detections)

for {
record, err := reader.Read()
if err == io.EOF {
break
}
if err != nil {
return nil, 0, fmt.Errorf("failed to read CSV row: %w", err)
}

startTime, _ = strconv.ParseFloat(record[cols.startTimeIdx], 64)
addDetectionsFromRow(record, cols, startTime, detections)
}

return detections, clipDuration, nil
}

// addDetectionsFromRow adds positive detections from a single CSV row
func addDetectionsFromRow(record []string, cols predCSVColumns, startTime float64, detections map[predFileSpeciesKey][]float64) {
fileName := record[cols.fileIdx]
for i, idx := range cols.ebirdIdx {
if record[idx] == "1" {
key := predFileSpeciesKey{File: fileName, EbirdCode: cols.ebirdCodes[i]}
detections[key] = append(detections[key], startTime)
}
}
}

// clusterDetections groups detections into clusters and produces sorted ClusteredCalls
func clusterDetections(detections map[predFileSpeciesKey][]float64, clipDuration, gapThreshold float64, minDetections int) ([]ClusteredCall, map[string]int) {
var allCalls []ClusteredCall
speciesCount := make(map[string]int)

for key, startTimes := range detections {
sort.Float64s(startTimes)

clusters := clusterStartTimes(startTimes, gapThreshold)

for _, cluster := range clusters {
if len(cluster) <= minDetections {
continue
}

call := ClusteredCall{
File: key.File,
StartTime: cluster[0],
EndTime: cluster[len(cluster)-1] + clipDuration,
EbirdCode: key.EbirdCode,
Segments: len(cluster),
}
allCalls = append(allCalls, call)
speciesCount[key.EbirdCode]++
}
}

sort.Slice(allCalls, func(i, j int) bool {
if allCalls[i].File != allCalls[j].File {
return allCalls[i].File < allCalls[j].File
}
return allCalls[i].StartTime < allCalls[j].StartTime
})

return allCalls, speciesCount
}

// DirCache caches directory entries for fast WAV file lookup.
// Scans the directory once and builds a map from lowercased basename to full filename.
// Safe for concurrent read-only use after construction.
type DirCache struct {
dir string
wavMap map[string]string // lowercase basename -> filename with original case (e.g. "20230610_150000" -> "20230610_150000.WAV")
dirMap map[string]string // lowercase basename -> filename for any file (used by from-raven for .selections.txt etc.)
}

// NewDirCache creates a DirCache by scanning the directory once.
func NewDirCache(dir string) *DirCache {
entries, err := os.ReadDir(dir)
if err != nil {
return &DirCache{dir: dir, wavMap: make(map[string]string), dirMap: make(map[string]string)}
}
wavMap := make(map[string]string, len(entries))
dirMap := make(map[string]string, len(entries))
for _, entry := range entries {
if entry.IsDir() {
continue
}
name := entry.Name()
ext := filepath.Ext(name)
base := strings.TrimSuffix(name, ext)
dirMap[strings.ToLower(base)] = name
if strings.EqualFold(ext, ".wav") {
wavMap[strings.ToLower(base)] = name
}
}
return &DirCache{dir: dir, wavMap: wavMap, dirMap: dirMap}
}

// FindWAV looks up a WAV file by basename (case-insensitive).
// Returns the full path with correct case, or empty string if not found.
func (dc *DirCache) FindWAV(baseName string) string {
if name, ok := dc.wavMap[strings.ToLower(baseName)]; ok {
return filepath.Join(dc.dir, name)
}
return ""
}

// FindFile looks up any file by basename (case-insensitive).
// Returns the full path with correct case, or empty string if not found.
func (dc *DirCache) FindFile(baseName string) string {
if name, ok := dc.dirMap[strings.ToLower(baseName)]; ok {
return filepath.Join(dc.dir, name)
}
return ""
}

// findWAVFile finds a WAV file in the directory with case-insensitive matching.
// baseName is the filename without extension (e.g., "20230610_150000").
// Returns the full path with correct case, or empty string if not found.
// Deprecated: Use DirCache.FindWAV for batch operations to avoid repeated directory scans.
func findWAVFile(dir, baseName string) string {
entries, err := os.ReadDir(dir)
if err != nil {
return ""
}
for _, entry := range entries {
if entry.IsDir() {
continue
}
name := entry.Name()
ext := filepath.Ext(name)
nameNoExt := strings.TrimSuffix(name, ext)
if nameNoExt == baseName && strings.EqualFold(ext, ".wav") {
return filepath.Join(dir, name)
}
}
return ""
}

// writeDotFiles writes AviaNZ .data files for each audio file with calls
// Uses parallel workers for improved performance on large batches
func writeDotFiles(csvPath, filter string, calls []ClusteredCall, progress ProgressHandler) (int, int, error) {
// Base directory is the directory containing the CSV file
csvDir := filepath.Dir(csvPath)

// Group calls by file (using extracted filename)
callsByFile := make(map[string][]ClusteredCall)
for _, call := range calls {
filename := filepath.Base(call.File)
callsByFile[filename] = append(callsByFile[filename], call)
}

// Report initial progress
if progress != nil {
progress(0, len(callsByFile), "Processing WAV files")
}

// If small batch, process sequentially (avoid goroutine overhead)
if len(callsByFile) < 10 {
return writeDotFilesSequential(csvDir, filter, callsByFile, progress)
}

// Parallel processing for larger batches
return writeDotFilesParallel(csvDir, filter, callsByFile, progress)
}

// dotDataJob represents a single file to process
type dotDataJob struct {
filename string
fileCalls []ClusteredCall
}

// dotDataResult represents the result of processing a single file
type dotDataResult struct {
filename string
written bool
err error
}

// writeDotFilesSequential processes files one at a time (for small batches)
func writeDotFilesSequential(csvDir, filter string, callsByFile map[string][]ClusteredCall, progress ProgressHandler) (int, int, error) {
dataFilesWritten := 0
dataFilesSkipped := 0
total := len(callsByFile)
processed := 0

for filename, fileCalls := range callsByFile {
// Find WAV file with correct case
baseName := strings.TrimSuffix(filename, filepath.Ext(filename))
wavPath := findWAVFile(csvDir, baseName)
if wavPath == "" {
dataFilesSkipped++
processed++
if progress != nil {
progress(processed, total, "")
}
continue
}

dataPath := wavPath + ".data"

sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
if err != nil {
dataFilesSkipped++
processed++
if progress != nil {
progress(processed, total, "")
}
continue
}

// Build segments and metadata
meta, segments := buildAviaNZMetaAndSegments(fileCalls, filter, duration, sampleRate)

if err := writeDotDataFileSafe(dataPath, segments, filter, meta); err != nil {
return dataFilesWritten, dataFilesSkipped, fmt.Errorf("failed to write %s: %w", dataPath, err)
}

dataFilesWritten++
processed++
if progress != nil {
progress(processed, total, "")
}
}

return dataFilesWritten, dataFilesSkipped, nil
}

// writeDotFilesParallel processes files concurrently using a worker pool
func writeDotFilesParallel(csvDir, filter string, callsByFile map[string][]ClusteredCall, progress ProgressHandler) (int, int, error) {
total := len(callsByFile)
var processed atomic.Int32

// Create job channel
jobs := make(chan dotDataJob, len(callsByFile))
results := make(chan dotDataResult, len(callsByFile))

// Start workers
var wg sync.WaitGroup
for range DOT_DATA_WORKERS {
wg.Add(1)
go dotDataWorker(csvDir, filter, jobs, results, &wg)
}

// Send jobs
for filename, fileCalls := range callsByFile {
jobs <- dotDataJob{filename: filename, fileCalls: fileCalls}
}
close(jobs)

// Wait for workers to finish
go func() {
wg.Wait()
close(results)
}()

// Collect results with progress reporting
dataFilesWritten := 0
dataFilesSkipped := 0
var firstErr error

for result := range results {
if result.err != nil && firstErr == nil {
firstErr = result.err
}
if result.written {
dataFilesWritten++
} else {
dataFilesSkipped++
}

// Report progress
if progress != nil {
current := int(processed.Add(1))
progress(current, total, "")
}
}

return dataFilesWritten, dataFilesSkipped, firstErr
}

// dotDataWorker processes files from the jobs channel
func dotDataWorker(csvDir, filter string, jobs <-chan dotDataJob, results chan<- dotDataResult, wg *sync.WaitGroup) {
defer wg.Done()

for job := range jobs {
// Find WAV file with correct case
baseName := strings.TrimSuffix(job.filename, filepath.Ext(job.filename))
wavPath := findWAVFile(csvDir, baseName)
if wavPath == "" {
results <- dotDataResult{filename: job.filename, written: false, err: nil}
continue
}

dataPath := wavPath + ".data"

sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
if err != nil {
results <- dotDataResult{filename: job.filename, written: false, err: nil}
continue
}

// Build segments and metadata
meta, segments := buildAviaNZMetaAndSegments(job.fileCalls, filter, duration, sampleRate)

if err := writeDotDataFileSafe(dataPath, segments, filter, meta); err != nil {
results <- dotDataResult{filename: job.filename, written: false, err: fmt.Errorf("failed to write %s: %w", dataPath, err)}
continue
}

results <- dotDataResult{filename: job.filename, written: true, err: nil}
}
}

// buildAviaNZMetaAndSegments creates metadata and segments for a .data file
func buildAviaNZMetaAndSegments(calls []ClusteredCall, filter string, duration float64, sampleRate int) (AviaNZMeta, []AviaNZSegment) {
// Create metadata
reviewer := "None"
meta := AviaNZMeta{
Operator: "Auto",
Reviewer: &reviewer,
Duration: duration,
}

// Build segments array
var segments []AviaNZSegment
for _, call := range calls {
// Create labels for this segment
labels := []AviaNZLabel{
{
Species: call.EbirdCode,
Certainty: DEFAULT_CERTAINTY,
Filter: filter,
},
}

// Create segment: [start, end, freq_low, freq_high, labels]
// freq_low=0, freq_high=sampleRate for full-band segments
segment := AviaNZSegment{
call.StartTime,
call.EndTime,
0, // freq_low
sampleRate, // freq_high (full band)
labels,
}
segments = append(segments, segment)
}

return meta, segments
}

// writeAviaNZDataFile writes a new .data file to disk (does not check for existing files)
func writeAviaNZDataFile(path string, data []any) error {
file, err := os.Create(path)
if err != nil {
return fmt.Errorf("failed to create file: %w", err)
}
defer func() { _ = file.Close() }()

encoder := json.NewEncoder(file)
encoder.SetIndent("", "") // No indentation for compact output

if err := encoder.Encode(data); err != nil {
return fmt.Errorf("failed to encode JSON: %w", err)
}

return nil
}

// writeDotDataFileSafe safely writes or merges .data files
// - If file doesn't exist: write new file
// - If file exists with same filter: return error (refuse to clobber)
// - If file exists with different filter: merge segments and write
// - If file exists but can't be parsed: return error (refuse to clobber)
func writeDotDataFileSafe(path string, newSegments []AviaNZSegment, filter string, meta AviaNZMeta) error {
// Check if file exists
if _, err := os.Stat(path); err == nil {
// File exists - parse and check
existing, err := utils.ParseDataFile(path)
if err != nil {
return fmt.Errorf("cannot parse existing %s: %w (refusing to clobber)", path, err)
}

// Check for duplicate filter
for _, seg := range existing.Segments {
if seg.HasFilterLabel(filter) {
return fmt.Errorf("%s already contains filter '%s' (refusing to clobber)", path, filter)
}
}

// Append new segments (different filter - safe to merge)
for _, newSeg := range newSegments {
seg := convertAviaNZSegment(newSeg, filter)
existing.Segments = append(existing.Segments, seg)
}

// Sort by start time
sort.Slice(existing.Segments, func(i, j int) bool {
return existing.Segments[i].StartTime < existing.Segments[j].StartTime
})

return existing.Write(path)
}

// File doesn't exist - write new
data := buildDataFileFromSegments(meta, newSegments)
return writeAviaNZDataFile(path, data)
}

// convertAviaNZSegment converts an AviaNZSegment to utils.Segment
func convertAviaNZSegment(seg AviaNZSegment, filter string) *utils.Segment {
labels := seg[4].([]AviaNZLabel)
utilsLabels := make([]*utils.Label, len(labels))
for i, l := range labels {
utilsLabels[i] = &utils.Label{
Species: l.Species,
Certainty: l.Certainty,
Filter: filter,
}
}

// Handle freq values (could be int or float64 depending on how they were created)
var freqLow, freqHigh float64
switch v := seg[2].(type) {
case int:
freqLow = float64(v)
case float64:
freqLow = v
}
switch v := seg[3].(type) {
case int:
freqHigh = float64(v)
case float64:
freqHigh = v
}

return &utils.Segment{
StartTime: seg[0].(float64),
EndTime: seg[1].(float64),
FreqLow: freqLow,
FreqHigh: freqHigh,
Labels: utilsLabels,
}
}

// buildDataFileFromSegments builds the data file structure from meta and segments
func buildDataFileFromSegments(meta AviaNZMeta, segments []AviaNZSegment) []any {
result := make([]any, 0, 1+len(segments))
result = append(result, meta)
for _, seg := range segments {
result = append(result, seg)
}
return result
}

// ParseFilterFromFilename extracts filter name from preds CSV filename
// "predsST_opensoundscape-kiwi-1.2_2025-11-12.csv" -> "opensoundscape-kiwi-1.2"
// Returns empty string if parsing fails
func ParseFilterFromFilename(csvPath string) string {
filename := filepath.Base(csvPath)
// Remove .csv extension
name := strings.TrimSuffix(filename, ".csv")

// Split on underscore
parts := strings.Split(name, "_")
if len(parts) == 3 {
return parts[1]
}

return ""
}

// clusterStartTimes groups consecutive start times into clusters
// where the gap between consecutive times is <= gapThreshold
func clusterStartTimes(startTimes []float64, gapThreshold float64) [][]float64 {
if len(startTimes) == 0 {
return nil
}

var clusters [][]float64
currentCluster := []float64{startTimes[0]}

for i := 1; i < len(startTimes); i++ {
gap := startTimes[i] - startTimes[i-1]
if gap <= gapThreshold {
// Same cluster
currentCluster = append(currentCluster, startTimes[i])
} else {
// New cluster
clusters = append(clusters, currentCluster)
currentCluster = []float64{startTimes[i]}
}
}
// Don't forget the last cluster
clusters = append(clusters, currentCluster)

return clusters
}
file addition: calls_from_common.go (----------)

[0.67281]

package calls

import (
"fmt"
"os"
"path/filepath"
"sort"
"sync"
"sync/atomic"
)

// CallsFromSourceInput defines the common input for calls-from-source tools
type CallsFromSourceInput struct {
Folder string `json:"folder"`
File string `json:"file"`
Delete bool `json:"delete"`
ProgressHandler ProgressHandler `json:"-"` // Optional progress callback
}

// CallsFromSourceOutput defines the common output for calls-from-source tools
type CallsFromSourceOutput struct {
Calls []ClusteredCall `json:"calls"`
TotalCalls int `json:"total_calls"`
SpeciesCount map[string]int `json:"species_count"`
DataFilesWritten int `json:"data_files_written"`
DataFilesSkipped int `json:"data_files_skipped"`
FilesProcessed int `json:"files_processed"`
FilesDeleted int `json:"files_deleted"`
Filter string `json:"filter"`
Error *string `json:"error,omitempty"`
}

// CallSource abstracts a source of bird call data (Raven, BirdNET, etc.)
type CallSource interface {
// Name returns the display name (e.g. "Raven", "BirdNET")
Name() string
// FindFiles discovers source files in the given folder
FindFiles(folder string) ([]string, error)
// ProcessFile processes a single source file and returns calls, write/skip status
ProcessFile(path string, cache *DirCache) (calls []ClusteredCall, written, skipped bool, err error)
}

// callsFromSource is the shared entry point for all call source tools.
func callsFromSource(src CallSource, input CallsFromSourceInput) (CallsFromSourceOutput, error) {
var output CallsFromSourceOutput
output.Filter = src.Name()

// Collect source files to process
var files []string
if input.File != "" {
files = []string{input.File}
} else if input.Folder != "" {
var err error
files, err = src.FindFiles(input.Folder)
if err != nil {
errMsg := fmt.Sprintf("Failed to find %s files: %v", src.Name(), err)
output.Error = &errMsg
return output, fmt.Errorf("%s", errMsg)
}
} else {
errMsg := "Either --folder or --file must be specified"
output.Error = &errMsg
return output, fmt.Errorf("%s", errMsg)
}

if len(files) == 0 {
errMsg := fmt.Sprintf("No %s files found", src.Name())
output.Error = &errMsg
return output, fmt.Errorf("%s", errMsg)
}

// Single file or small batch: process sequentially (avoid goroutine overhead)
if len(files) < 10 {
return callsFromSourceSequential(src, input, files)
}

// Large batch: parallel processing with DirCache
return callsFromSourceParallel(src, input, files)
}

// callsFromSourceSequential processes source files one at a time (for small batches)
func callsFromSourceSequential(src CallSource, input CallsFromSourceInput, files []string) (CallsFromSourceOutput, error) {
var output CallsFromSourceOutput
output.Filter = src.Name()

// Build DirCache once for the folder
dirCaches := make(map[string]*DirCache)
if input.Folder != "" {
dirCaches[input.Folder] = NewDirCache(input.Folder)
}

speciesCount := make(map[string]int)
var allCalls []ClusteredCall
dataFilesWritten := 0
dataFilesSkipped := 0
filesProcessed := 0
filesDeleted := 0

for _, file := range files {
dir := filepath.Dir(file)
cache := dirCaches[dir]
if cache == nil {
cache = NewDirCache(dir)
dirCaches[dir] = cache
}

calls, written, skipped, err := src.ProcessFile(file, cache)
if err != nil {
errMsg := fmt.Sprintf("Error processing %s: %v", file, err)
output.Error = &errMsg
return output, fmt.Errorf("%s", errMsg)
}

if written {
dataFilesWritten++
}
if skipped {
dataFilesSkipped++
}

for _, call := range calls {
allCalls = append(allCalls, call)
speciesCount[call.EbirdCode]++
}

filesProcessed++

// Delete if requested and successfully processed
if input.Delete && written {
if err := os.Remove(file); err != nil {
errMsg := fmt.Sprintf("Failed to delete %s: %v", file, err)
output.Error = &errMsg
return output, fmt.Errorf("%s", errMsg)
}
filesDeleted++
}

if input.ProgressHandler != nil {
input.ProgressHandler(filesProcessed, len(files), filepath.Base(file))
}
}

// Sort all calls by file, then start time
sort.Slice(allCalls, func(i, j int) bool {
if allCalls[i].File != allCalls[j].File {
return allCalls[i].File < allCalls[j].File
}
return allCalls[i].StartTime < allCalls[j].StartTime
})

output.Calls = allCalls
output.TotalCalls = len(allCalls)
output.SpeciesCount = speciesCount
output.DataFilesWritten = dataFilesWritten
output.DataFilesSkipped = dataFilesSkipped
output.FilesProcessed = filesProcessed
output.FilesDeleted = filesDeleted

return output, nil
}

// sourceJob represents a single file to process (generic over CallSource)
type sourceJob struct {
filePath string
}

// sourceResult represents the result of processing a single source file
type sourceResult struct {
path string
calls []ClusteredCall
written bool
skipped bool
err error
}

func (r sourceResult) filePath() string { return r.path }
func (r sourceResult) getCalls() []ClusteredCall { return r.calls }
func (r sourceResult) wasWritten() bool { return r.written }
func (r sourceResult) wasSkipped() bool { return r.skipped }
func (r sourceResult) getError() error { return r.err }

// callsFromSourceParallel processes source files concurrently using a worker pool and DirCache
func callsFromSourceParallel(src CallSource, input CallsFromSourceInput, files []string) (CallsFromSourceOutput, error) {
var output CallsFromSourceOutput
output.Filter = src.Name()

total := len(files)
var processed atomic.Int32

// Build DirCache for the folder
dirCaches := &sync.Map{}
if input.Folder != "" {
cache := NewDirCache(input.Folder)
dirCaches.Store(input.Folder, cache)
}

// Create job and result channels
jobs := make(chan sourceJob, total)
results := make(chan parallelResult, total)

// Start workers
var wg sync.WaitGroup
for range DOT_DATA_WORKERS {
wg.Add(1)
go sourceWorker(src, dirCaches, jobs, results, &wg)
}

// Send jobs
for _, file := range files {
jobs <- sourceJob{filePath: file}
}
close(jobs)

// Wait for workers to finish, then close results
go func() {
wg.Wait()
close(results)
}()

// Collect results with progress reporting
stats := aggregateResults(results, total, &processed, input.Delete, input.ProgressHandler)

if stats.firstErr != nil {
errMsg := stats.firstErr.Error()
output.Error = &errMsg
return output, stats.firstErr
}

sortCallsByFileAndTime(stats.calls)

output.Calls = stats.calls
output.TotalCalls = len(stats.calls)
output.SpeciesCount = stats.speciesCount
output.DataFilesWritten = stats.dataFilesWritten
output.DataFilesSkipped = stats.dataFilesSkipped
output.FilesProcessed = stats.filesProcessed
output.FilesDeleted = stats.filesDeleted

return output, nil
}

// sourceWorker processes source files from the jobs channel
func sourceWorker(src CallSource, dirCaches *sync.Map, jobs <-chan sourceJob, results chan<- parallelResult, wg *sync.WaitGroup) {
defer wg.Done()

for job := range jobs {
dir := filepath.Dir(job.filePath)

// Get or create DirCache for this directory
var cache *DirCache
if cached, ok := dirCaches.Load(dir); ok {
cache = cached.(*DirCache)
} else {
cache = NewDirCache(dir)
dirCaches.Store(dir, cache)
}

calls, written, skipped, err := src.ProcessFile(job.filePath, cache)
results <- sourceResult{
path: job.filePath,
calls: calls,
written: written,
skipped: skipped,
err: err,
}
}
}
file addition: calls_from_birda_raven_test.go (----------)

[0.67281]

package calls

import (
"os"
"path/filepath"
"testing"

"skraak/utils"
)

// ============================================
// BirdNET Tests
// ============================================

func TestCallsFromBirda_NewDataFile(t *testing.T) {
tmpDir := t.TempDir()

// Create a minimal WAV file
wavPath := filepath.Join(tmpDir, "test.WAV")
createMinimalWAV(t, wavPath, 16000, 60.0)

// Create BirdNET results file
birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Turdus migratorius,American Robin,0.85,/some/path/test.WAV\n"
if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
t.Fatal(err)
}

input := CallsFromBirdaInput{
File: birdaPath,
}

output, err := CallsFromBirda(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.DataFilesWritten != 1 {
t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
}
if output.Filter != "BirdNET" {
t.Errorf("expected filter 'BirdNET', got '%s'", output.Filter)
}
if output.TotalCalls != 1 {
t.Errorf("expected 1 call, got %d", output.TotalCalls)
}

// Verify .data file was created
dataPath := wavPath + ".data"
df, err := utils.ParseDataFile(dataPath)
if err != nil {
t.Fatalf("failed to parse .data file: %v", err)
}
if len(df.Segments) != 1 {
t.Errorf("expected 1 segment, got %d", len(df.Segments))
}
if df.Segments[0].Labels[0].Filter != "BirdNET" {
t.Errorf("expected filter 'BirdNET', got '%s'", df.Segments[0].Labels[0].Filter)
}
if df.Segments[0].Labels[0].Certainty != 85 {
t.Errorf("expected certainty 85, got %d", df.Segments[0].Labels[0].Certainty)
}
}

func TestCallsFromBirda_ExistingSameFilter(t *testing.T) {
tmpDir := t.TempDir()

wavPath := filepath.Join(tmpDir, "test.WAV")
createMinimalWAV(t, wavPath, 16000, 60.0)

dataPath := wavPath + ".data"
existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Existing Bird", "certainty": 90, "filter": "BirdNET"}]]]`
if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
t.Fatal(err)
}

birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,New Bird,New Bird,0.85,test.WAV\n"
if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
t.Fatal(err)
}

input := CallsFromBirdaInput{File: birdaPath}
output, err := CallsFromBirda(input)

if err == nil {
t.Error("expected error for same filter, got nil")
}
if output.Error == nil {
t.Error("expected error message in output")
}
}

func TestCallsFromBirda_ExistingDifferentFilter(t *testing.T) {
tmpDir := t.TempDir()

wavPath := filepath.Join(tmpDir, "test.WAV")
createMinimalWAV(t, wavPath, 16000, 60.0)

dataPath := wavPath + ".data"
existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Kiwi", "certainty": 90, "filter": "Manual"}]]]`
if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
t.Fatal(err)
}

birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Robin,Robin,0.85,test.WAV\n"
if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
t.Fatal(err)
}

input := CallsFromBirdaInput{File: birdaPath}
output, err := CallsFromBirda(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.DataFilesWritten != 1 {
t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
}

df, err := utils.ParseDataFile(dataPath)
if err != nil {
t.Fatalf("failed to parse .data file: %v", err)
}
if len(df.Segments) != 2 {
t.Errorf("expected 2 segments after merge, got %d", len(df.Segments))
}
}

func TestCallsFromBirda_DeleteOption(t *testing.T) {
tmpDir := t.TempDir()

wavPath := filepath.Join(tmpDir, "test.WAV")
createMinimalWAV(t, wavPath, 16000, 60.0)

birdaPath := filepath.Join(tmpDir, "test.BirdNET.results.csv")
birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Robin,Robin,0.85,test.WAV\n"
if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
t.Fatal(err)
}

input := CallsFromBirdaInput{File: birdaPath, Delete: true}
output, err := CallsFromBirda(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.FilesDeleted != 1 {
t.Errorf("expected 1 file deleted, got %d", output.FilesDeleted)
}
if _, err := os.Stat(birdaPath); !os.IsNotExist(err) {
t.Error("expected BirdNET file to be deleted")
}
}

func TestCallsFromBirda_FolderMode(t *testing.T) {
tmpDir := t.TempDir()

for i := range 2 {
wavPath := filepath.Join(tmpDir, "test"+string(rune('0'+i))+".WAV")
createMinimalWAV(t, wavPath, 16000, 60.0)

birdaPath := filepath.Join(tmpDir, "test"+string(rune('0'+i))+".BirdNET.results.csv")
birdaContent := "\ufeffStart (s),End (s),Scientific name,Common name,Confidence,File\n0.0,3.0,Bird,Bird,0.85,test.WAV\n"
if err := os.WriteFile(birdaPath, []byte(birdaContent), 0644); err != nil {
t.Fatal(err)
}
}

input := CallsFromBirdaInput{Folder: tmpDir}
output, err := CallsFromBirda(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.FilesProcessed != 2 {
t.Errorf("expected 2 files processed, got %d", output.FilesProcessed)
}
if output.DataFilesWritten != 2 {
t.Errorf("expected 2 data files written, got %d", output.DataFilesWritten)
}
}

// ============================================
// Raven Tests
// ============================================

func TestCallsFromRaven_NewDataFile(t *testing.T) {
tmpDir := t.TempDir()

wavPath := filepath.Join(tmpDir, "test.WAV")
createMinimalWAV(t, wavPath, 16000, 60.0)

ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tKiwi\n"
if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
t.Fatal(err)
}

input := CallsFromRavenInput{File: ravenPath}
output, err := CallsFromRaven(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.DataFilesWritten != 1 {
t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
}
if output.Filter != "Raven" {
t.Errorf("expected filter 'Raven', got '%s'", output.Filter)
}

dataPath := wavPath + ".data"
df, err := utils.ParseDataFile(dataPath)
if err != nil {
t.Fatalf("failed to parse .data file: %v", err)
}
if df.Segments[0].FreqLow != 1000 {
t.Errorf("expected freq_low 1000, got %f", df.Segments[0].FreqLow)
}
if df.Segments[0].FreqHigh != 5000 {
t.Errorf("expected freq_high 5000, got %f", df.Segments[0].FreqHigh)
}
}

func TestCallsFromRaven_ExistingSameFilter(t *testing.T) {
tmpDir := t.TempDir()

wavPath := filepath.Join(tmpDir, "test.WAV")
createMinimalWAV(t, wavPath, 16000, 60.0)

dataPath := wavPath + ".data"
existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Existing", "certainty": 90, "filter": "Raven"}]]]`
if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
t.Fatal(err)
}

ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tNew\n"
if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
t.Fatal(err)
}

input := CallsFromRavenInput{File: ravenPath}
output, err := CallsFromRaven(input)

if err == nil {
t.Error("expected error for same filter, got nil")
}
if output.Error == nil {
t.Error("expected error message in output")
}
}

func TestCallsFromRaven_ExistingDifferentFilter(t *testing.T) {
tmpDir := t.TempDir()

wavPath := filepath.Join(tmpDir, "test.WAV")
createMinimalWAV(t, wavPath, 16000, 60.0)

dataPath := wavPath + ".data"
existingData := `[{"Operator": "Test", "Duration": 60.0}, [5.0, 10.0, 0, 16000, [{"species": "Kiwi", "certainty": 90, "filter": "BirdNET"}]]]`
if err := os.WriteFile(dataPath, []byte(existingData), 0644); err != nil {
t.Fatal(err)
}

ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tMorepork\n"
if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
t.Fatal(err)
}

input := CallsFromRavenInput{File: ravenPath}
output, err := CallsFromRaven(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.DataFilesWritten != 1 {
t.Errorf("expected 1 data file written, got %d", output.DataFilesWritten)
}

df, err := utils.ParseDataFile(dataPath)
if err != nil {
t.Fatalf("failed to parse .data file: %v", err)
}
if len(df.Segments) != 2 {
t.Errorf("expected 2 segments after merge, got %d", len(df.Segments))
}
}

func TestCallsFromRaven_DeleteOption(t *testing.T) {
tmpDir := t.TempDir()

wavPath := filepath.Join(tmpDir, "test.WAV")
createMinimalWAV(t, wavPath, 16000, 60.0)

ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tKiwi\n"
if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
t.Fatal(err)
}

input := CallsFromRavenInput{File: ravenPath, Delete: true}
output, err := CallsFromRaven(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.FilesDeleted != 1 {
t.Errorf("expected 1 file deleted, got %d", output.FilesDeleted)
}
if _, err := os.Stat(ravenPath); !os.IsNotExist(err) {
t.Error("expected Raven file to be deleted")
}
}

func TestCallsFromRaven_MultipleSelections(t *testing.T) {
tmpDir := t.TempDir()

wavPath := filepath.Join(tmpDir, "test.WAV")
createMinimalWAV(t, wavPath, 16000, 60.0)

ravenPath := filepath.Join(tmpDir, "test.Table.1.selections.txt")
ravenContent := "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n1\tSpectrogram 1\t1\t0.0\t5.0\t1000\t5000\tKiwi\n2\tSpectrogram 1\t1\t10.0\t15.0\t2000\t6000\tMorepork\n3\tSpectrogram 1\t1\t20.0\t25.0\t1500\t4500\tTui\n"
if err := os.WriteFile(ravenPath, []byte(ravenContent), 0644); err != nil {
t.Fatal(err)
}

input := CallsFromRavenInput{File: ravenPath}
output, err := CallsFromRaven(input)

if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if output.TotalCalls != 3 {
t.Errorf("expected 3 calls, got %d", output.TotalCalls)
}
if output.SpeciesCount["Kiwi"] != 1 || output.SpeciesCount["Morepork"] != 1 || output.SpeciesCount["Tui"] != 1 {
t.Errorf("unexpected species count: %v", output.SpeciesCount)
}
}
file addition: calls_from_birda.go (----------)

[0.67281]

package calls

import (
"encoding/csv"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"strings"

"skraak/utils"
)

// CallsFromBirdaInput defines the input for the calls-from-birda tool
type CallsFromBirdaInput struct {
Folder string `json:"folder"`
File string `json:"file"`
Delete bool `json:"delete"`
ProgressHandler ProgressHandler `json:"-"` // Optional progress callback
}

// CallsFromBirdaOutput defines the output for the calls-from-birda tool
type CallsFromBirdaOutput struct {
Calls []ClusteredCall `json:"calls"`
TotalCalls int `json:"total_calls"`
SpeciesCount map[string]int `json:"species_count"`
DataFilesWritten int `json:"data_files_written"`
DataFilesSkipped int `json:"data_files_skipped"`
FilesProcessed int `json:"files_processed"`
FilesDeleted int `json:"files_deleted"`
Filter string `json:"filter"`
Error *string `json:"error,omitempty"`
}

// birdaSource implements CallSource for BirdNET results files
type birdaSource struct{}

func (birdaSource) Name() string { return "BirdNET" }

func (birdaSource) FindFiles(folder string) ([]string, error) {
var files []string

entries, err := os.ReadDir(folder)
if err != nil {
return nil, err
}

for _, entry := range entries {
name := entry.Name()
if strings.HasSuffix(name, ".BirdNET.results.csv") {
files = append(files, filepath.Join(folder, name))
}
}

return files, nil
}

func (birdaSource) ProcessFile(birdaFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
return processBirdaFileCached(birdaFile, cache)
}

// CallsFromBirda processes BirdNET results files and writes .data files
func CallsFromBirda(input CallsFromBirdaInput) (CallsFromBirdaOutput, error) {
src := birdaSource{}
commonInput := CallsFromSourceInput(input)

commonOutput, err := callsFromSource(src, commonInput)

// Convert to Birda-specific output type
var output CallsFromBirdaOutput
output.Calls = commonOutput.Calls
output.TotalCalls = commonOutput.TotalCalls
output.SpeciesCount = commonOutput.SpeciesCount
output.DataFilesWritten = commonOutput.DataFilesWritten
output.DataFilesSkipped = commonOutput.DataFilesSkipped
output.FilesProcessed = commonOutput.FilesProcessed
output.FilesDeleted = commonOutput.FilesDeleted
output.Filter = commonOutput.Filter
output.Error = commonOutput.Error
return output, err
}

// BirdNETDetection represents a single BirdNET detection
type BirdNETDetection struct {
StartTime float64
EndTime float64
ScientificName string
CommonName string
Confidence float64
WAVPath string
}

// birdaColumnIndices holds the parsed column positions from a BirdNET CSV header.
type birdaColumnIndices struct {
startIdx int
endIdx int
commonNameIdx int
confidenceIdx int
fileIdx int
}

// parseBirdaCSVHeader reads the CSV header row and returns column indices.
func parseBirdaCSVHeader(reader *csv.Reader) (birdaColumnIndices, error) {
header, err := reader.Read()
if err != nil {
return birdaColumnIndices{}, fmt.Errorf("failed to read header: %w", err)
}

idx := birdaColumnIndices{startIdx: -1, endIdx: -1, commonNameIdx: -1, confidenceIdx: -1, fileIdx: -1}
for i, col := range header {
col = strings.TrimPrefix(col, "\ufeff")
switch col {
case "Start (s)":
idx.startIdx = i
case "End (s)":
idx.endIdx = i
case "Common name":
idx.commonNameIdx = i
case "Confidence":
idx.confidenceIdx = i
case "File":
idx.fileIdx = i
}
}

if idx.startIdx == -1 || idx.endIdx == -1 || idx.commonNameIdx == -1 || idx.confidenceIdx == -1 {
return birdaColumnIndices{}, fmt.Errorf("missing required columns in BirdNET file")
}
return idx, nil
}

// readBirdaDetections reads all detection records from a BirdNET CSV.
func readBirdaDetections(reader *csv.Reader, idx birdaColumnIndices) ([]BirdNETDetection, error) {
var detections []BirdNETDetection
for {
record, err := reader.Read()
if err == io.EOF {
break
}
if err != nil {
return nil, fmt.Errorf("failed to read record: %w", err)
}

var det BirdNETDetection
startTime, perr := strconv.ParseFloat(record[idx.startIdx], 64)
if perr != nil {
return nil, fmt.Errorf("failed to parse start time %q: %w", record[idx.startIdx], perr)
}
det.StartTime = startTime

endTime, perr := strconv.ParseFloat(record[idx.endIdx], 64)
if perr != nil {
return nil, fmt.Errorf("failed to parse end time %q: %w", record[idx.endIdx], perr)
}
det.EndTime = endTime

det.CommonName = record[idx.commonNameIdx]

confidence, perr := strconv.ParseFloat(record[idx.confidenceIdx], 64)
if perr != nil {
return nil, fmt.Errorf("failed to parse confidence %q: %w", record[idx.confidenceIdx], perr)
}
det.Confidence = confidence

if idx.fileIdx >= 0 && idx.fileIdx < len(record) {
det.WAVPath = record[idx.fileIdx]
}

detections = append(detections, det)
}
return detections, nil
}

// resolveBirdaWAVPath finds the WAV file associated with a BirdNET results file.
func resolveBirdaWAVPath(birdaFile string, firstWAVPath string, cache *DirCache) string {
if firstWAVPath != "" {
if _, err := os.Stat(firstWAVPath); err == nil {
return firstWAVPath
}
}

dir := filepath.Dir(birdaFile)
base := filepath.Base(birdaFile)
baseName := strings.TrimSuffix(base, ".BirdNET.results.csv")

if cache != nil {
return cache.FindWAV(baseName)
}
return findWAVFile(dir, baseName)
}

// processBirdaFileCached processes a single BirdNET results file using a DirCache for WAV lookup
func processBirdaFileCached(birdaFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {
file, err := os.Open(birdaFile)
if err != nil {
return nil, false, false, fmt.Errorf("failed to open file: %w", err)
}
defer func() { _ = file.Close() }()

reader := csv.NewReader(file)

idx, err := parseBirdaCSVHeader(reader)
if err != nil {
return nil, false, false, err
}

detections, err := readBirdaDetections(reader, idx)
if err != nil {
return nil, false, false, err
}
if len(detections) == 0 {
return nil, false, true, nil
}

wavPath := resolveBirdaWAVPath(birdaFile, detections[0].WAVPath, cache)
if wavPath == "" {
return nil, false, true, nil
}

sampleRate, duration, err := utils.ParseWAVHeaderMinimal(wavPath)
if err != nil {
return nil, false, true, nil
}

dataPath := wavPath + ".data"
segments := buildBirdNETSegments(detections, sampleRate)

meta := AviaNZMeta{Operator: "BirdNET", Duration: duration}
reviewer := "None"
meta.Reviewer = &reviewer

if err := writeDotDataFileSafe(dataPath, segments, "BirdNET", meta); err != nil {
return nil, false, false, err
}

var calls []ClusteredCall
for _, det := range detections {
calls = append(calls, ClusteredCall{
File: wavPath,
StartTime: det.StartTime,
EndTime: det.EndTime,
EbirdCode: det.CommonName,
Segments: 1,
})
}

return calls, true, false, nil
}

// buildBirdNETSegments converts BirdNET detections to AviaNZ segments
func buildBirdNETSegments(detections []BirdNETDetection, sampleRate int) []AviaNZSegment {
var segments []AviaNZSegment

for _, det := range detections {
// Convert confidence (0.0-1.0) to certainty (0-100)
certainty := min(max(int(det.Confidence*100), 0), 100)

labels := []AviaNZLabel{
{
Species: det.CommonName,
Certainty: certainty,
Filter: "BirdNET",
},
}

segment := AviaNZSegment{
det.StartTime,
det.EndTime,
0, // freq_low
sampleRate, // freq_high (full band)
labels,
}
segments = append(segments, segment)
}

return segments
}
file addition: calls_detect_anomalies_test.go (----------)

[0.67281]

package calls

import (
"os"
"path/filepath"
"testing"
)

func TestDetectAnomalies_LabelMismatch(t *testing.T) {
dir := t.TempDir()

// Same time range, different calltypes across two models
data := `[{"Operator":"test"},` +
`[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-a"},` +
`{"species":"Kiwi","calltype":"Male","certainty":100,"filter":"model-b"}]]]`
if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
t.Fatal(err)
}

out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
if err != nil {
t.Fatal(err)
}
if out.LabelMismatches != 1 {
t.Errorf("expected 1 label mismatch, got %d", out.LabelMismatches)
}
if out.CertaintyMismatches != 0 {
t.Errorf("expected 0 certainty mismatches, got %d", out.CertaintyMismatches)
}
if out.Anomalies[0].Type != "label_mismatch" {
t.Errorf("expected label_mismatch, got %s", out.Anomalies[0].Type)
}
}

func TestDetectAnomalies_CertaintyMismatch(t *testing.T) {
dir := t.TempDir()

// Same time range, same labels, different certainty
data := `[{"Operator":"test"},` +
`[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":90,"filter":"model-a"},` +
`{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-b"}]]]`
if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
t.Fatal(err)
}

out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
if err != nil {
t.Fatal(err)
}
if out.CertaintyMismatches != 1 {
t.Errorf("expected 1 certainty mismatch, got %d", out.CertaintyMismatches)
}
if out.LabelMismatches != 0 {
t.Errorf("expected 0 label mismatches, got %d", out.LabelMismatches)
}
}

func TestDetectAnomalies_NoAnomalyWhenAgreement(t *testing.T) {
dir := t.TempDir()

data := `[{"Operator":"test"},` +
`[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-a"},` +
`{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-b"}]]]`
if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
t.Fatal(err)
}

out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
if err != nil {
t.Fatal(err)
}
if out.AnomaliesTotal != 0 {
t.Errorf("expected 0 anomalies, got %d", out.AnomaliesTotal)
}
}

func TestDetectAnomalies_LonelySegmentSkipped(t *testing.T) {
dir := t.TempDir()

// model-a has a segment, model-b has no segment in this file
data := `[{"Operator":"test"},` +
`[0,10,100,1000,[{"species":"Kiwi","certainty":100,"filter":"model-a"}]]]`
if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {
t.Fatal(err)
}

out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})
if err != nil {
t.Fatal(err)
}
if out.AnomaliesTotal != 0 {
t.Errorf("lonely segment should be skipped, got %d anomalies", out.AnomaliesTotal)
}
if out.FilesWithAllModels != 0 {
t.Errorf("file missing a model should not count as FilesWithAllModels")
}
}

func TestDetectAnomalies_FailsWithOneModel(t *testing.T) {
dir := t.TempDir()
_, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a"}})
if err == nil {
t.Error("expected error with only 1 model")
}
}
file addition: calls_detect_anomalies.go (----------)

[0.67281]

package calls

import (
"fmt"
"os"
"path/filepath"

"skraak/utils"
)

type DetectAnomaliesInput struct {
Folder string
Models []string // at least 2 filter names
Species []string // optional scope; empty = all species
}

type DetectAnomaliesOutput struct {
Folder string `json:"folder"`
Models []string `json:"models"`
FilesExamined int `json:"files_examined"`
FilesWithAllModels int `json:"files_with_all_models"`
AnomaliesTotal int `json:"anomalies_total"`
LabelMismatches int `json:"label_mismatches"`
CertaintyMismatches int `json:"certainty_mismatches"`
Anomalies []Anomaly `json:"anomalies,omitempty"`
Error string `json:"error,omitempty"`
}

type Anomaly struct {
File string `json:"file"`
Type string `json:"type"` // "label_mismatch" | "certainty_mismatch"
Segments []AnomalySegment `json:"segments"`
}

type AnomalySegment struct {
Model string `json:"model"`
Start float64 `json:"start"`
End float64 `json:"end"`
Species string `json:"species"`
CallType string `json:"calltype,omitempty"`
Certainty int `json:"certainty"`
}

// DetectAnomalies compares corresponding segments across multiple ML model filters
// within each .data file. Segments are matched by time overlap (same logic as propagate).
// Lonely segments (no overlap in one or more models) are silently skipped.
// Anomalies are flagged when overlapping segments disagree on species+calltype,
// or when labels match but certainty values differ.
// validateAnomalyInput validates the input parameters for DetectAnomalies.
func validateAnomalyInput(input DetectAnomaliesInput) error {
if len(input.Models) < 2 {
return fmt.Errorf("at least 2 --model values required")
}
for i, a := range input.Models {
for j, b := range input.Models {
if i != j && a == b {
return fmt.Errorf("duplicate --model values are not allowed")
}
}
}

info, err := os.Stat(input.Folder)
if err != nil {
return fmt.Errorf("folder not found: %s", input.Folder)
}
if !info.IsDir() {
return fmt.Errorf("not a directory: %s", input.Folder)
}

return nil
}

func DetectAnomalies(input DetectAnomaliesInput) (DetectAnomaliesOutput, error) {
folder := filepath.Clean(input.Folder)
output := DetectAnomaliesOutput{
Folder: folder,
Models: input.Models,
}

if err := validateAnomalyInput(input); err != nil {
output.Error = err.Error()
return output, err
}

files, err := utils.FindDataFiles(folder)
if err != nil {
output.Error = fmt.Sprintf("list .data files: %v", err)
return output, fmt.Errorf("%s", output.Error)
}

scopeSet := make(map[string]bool, len(input.Species))
for _, s := range input.Species {
scopeSet[s] = true
}

for _, path := range files {
df, err := utils.ParseDataFile(path)
if err != nil {
continue
}
output.FilesExamined++

anomalies := detectAnomaliesInFile(df, path, input.Models, scopeSet)
if anomalies == nil {
// file didn't have all models present
continue
}
output.FilesWithAllModels++
for _, a := range anomalies {
if a.Type == "label_mismatch" {
output.LabelMismatches++
} else {
output.CertaintyMismatches++
}
}
output.Anomalies = append(output.Anomalies, anomalies...)
}
output.AnomaliesTotal = len(output.Anomalies)
return output, nil
}

// labeledSeg pairs a segment with the specific label matching the model filter.
type labeledSeg struct {
seg *utils.Segment
label *utils.Label
}

// detectAnomaliesInFile returns nil if the file doesn't contain all required models.
func detectAnomaliesInFile(df *utils.DataFile, path string, models []string, scope map[string]bool) []Anomaly {
modelSegs := collectModelSegments(df, models)

// Skip file if any model is entirely absent.
for _, model := range models {
if len(modelSegs[model]) == 0 {
return nil
}
}

var anomalies []Anomaly
for _, anchor := range modelSegs[models[0]] {
if !inScope(anchor, scope) {
continue
}
if matches := findOverlappingMatches(anchor, models, modelSegs); matches == nil {
continue
} else {
group := buildComparisonGroup(anchor, models, matches)
if a := checkGroupAnomaly(group, path, models); a != nil {
anomalies = append(anomalies, *a)
}
}
}
return anomalies
}

// collectModelSegments groups labeled segments by model filter name.
func collectModelSegments(df *utils.DataFile, models []string) map[string][]labeledSeg {
modelSegs := make(map[string][]labeledSeg, len(models))
for _, seg := range df.Segments {
for _, lbl := range seg.Labels {
for _, model := range models {
if lbl.Filter == model {
modelSegs[model] = append(modelSegs[model], labeledSeg{seg: seg, label: lbl})
break
}
}
}
}
return modelSegs
}

// inScope returns true if the anchor's label is within the species scope filter.
func inScope(anchor labeledSeg, scope map[string]bool) bool {
if len(scope) == 0 {
return true
}
key := anchor.label.Species
if anchor.label.CallType != "" {
key += "+" + anchor.label.CallType
}
return scope[key] || scope[anchor.label.Species]
}

// findOverlappingMatches returns matches[model] = overlapping segments from that model,
// or nil if any model has no overlap (lonely anchor).
func findOverlappingMatches(anchor labeledSeg, models []string, modelSegs map[string][]labeledSeg) map[string][]labeledSeg {
matches := make(map[string][]labeledSeg, len(models)-1)
for _, model := range models[1:] {
for _, candidate := range modelSegs[model] {
if overlaps(anchor.seg, candidate.seg) {
matches[model] = append(matches[model], candidate)
}
}
if len(matches[model]) == 0 {
return nil
}
}
return matches
}

// buildComparisonGroup assembles anchor + first match per other model.
func buildComparisonGroup(anchor labeledSeg, models []string, matches map[string][]labeledSeg) []labeledSeg {
group := []labeledSeg{anchor}
for _, model := range models[1:] {
group = append(group, matches[model][0])
}
return group
}

// checkGroupAnomaly checks a comparison group for label or certainty mismatches.
func checkGroupAnomaly(group []labeledSeg, path string, models []string) *Anomaly {
refSpecies := group[0].label.Species
refCallType := group[0].label.CallType
for _, ls := range group[1:] {
if ls.label.Species != refSpecies || ls.label.CallType != refCallType {
a := Anomaly{File: path, Type: "label_mismatch", Segments: buildAnomalySegs(group, models)}
return &a
}
}
refCertainty := group[0].label.Certainty
for _, ls := range group[1:] {
if ls.label.Certainty != refCertainty {
a := Anomaly{File: path, Type: "certainty_mismatch", Segments: buildAnomalySegs(group, models)}
return &a
}
}
return nil
}

func buildAnomalySegs(group []labeledSeg, models []string) []AnomalySegment {
segs := make([]AnomalySegment, len(group))
for i, ls := range group {
segs[i] = AnomalySegment{
Model: models[i],
Start: ls.seg.StartTime,
End: ls.seg.EndTime,
Species: ls.label.Species,
CallType: ls.label.CallType,
Certainty: ls.label.Certainty,
}
}
return segs
}

// overlaps returns true if two segments share any time overlap.
func overlaps(a, b *utils.Segment) bool {
return a.StartTime < b.EndTime && b.StartTime < a.EndTime
}
file addition: calls_clip_labels_test.go (----------)

[0.67281]

package calls

import (
"encoding/csv"
"os"
"path/filepath"
"strings"
"testing"

"skraak/utils"
)

// --- test helpers (test file only) ---

func writeDataFile(t *testing.T, dir, name string, df *utils.DataFile) {
t.Helper()
if err := df.Write(filepath.Join(dir, name)); err != nil {
t.Fatalf("write .data file %s: %v", name, err)
}
}

func writeMapping(t *testing.T, dir, json string) {
t.Helper()
if err := os.WriteFile(filepath.Join(dir, "mapping.json"), []byte(json), 0644); err != nil {
t.Fatalf("write mapping.json: %v", err)
}
}

// parseCSV reads the output CSV, returning header and rows.
func parseCSV(t *testing.T, path string) ([]string, [][]string) {
t.Helper()
f, err := os.Open(path)
if err != nil {
t.Fatalf("open CSV %s: %v", path, err)
}
defer f.Close()
r := csv.NewReader(f)
header, err := r.Read()
if err != nil {
t.Fatalf("read header: %v", err)
}
rows, err := r.ReadAll()
if err != nil {
t.Fatalf("read rows: %v", err)
}
return header, rows
}

// clipLabels calls CallsClipLabels with standard test parameters.
func clipLabels(t *testing.T, dir string, extra ...func(*CallsClipLabelsInput)) CallsClipLabelsOutput {
t.Helper()
input := CallsClipLabelsInput{
Folder: dir,
MappingPath: filepath.Join(dir, "mapping.json"),
OutputPath: filepath.Join(dir, "clip_labels.csv"),
ClipDuration: 5,
ClipOverlap: 0,
MinLabelOverlap: 0.25,
FinalClip: "full",
}
for _, fn := range extra {
fn(&input)
}
out, err := CallsClipLabels(input)
if err != nil {
t.Fatalf("CallsClipLabels: %v", err)
}
return out
}

// --- tests ---

func TestClipLabels_RealClassTrue(t *testing.T) {
dir := t.TempDir()
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 20},
Segments: []*utils.Segment{
{
StartTime: 3, EndTime: 8, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)

out := clipLabels(t, dir)
header, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))

// Header: file, start_time, end_time, Kiwi
if len(header) != 4 || header[3] != "Kiwi" {
t.Fatalf("header = %v, want [..., Kiwi]", header)
}

// Clip 0-5 overlaps segment 3-8 by 2s ≥ 0.25 → Kiwi=True
// Clip 5-10 overlaps segment 3-8 by 3s ≥ 0.25 → Kiwi=True
// Clip 10-15, 15-20 → Kiwi=False
kiwiCol := 3
for i, row := range rows {
switch row[1] {
case "0.0", "5.0":
if row[kiwiCol] != "True" {
t.Errorf("row %d (start=%s): Kiwi=%s, want True", i, row[1], row[kiwiCol])
}
case "10.0", "15.0":
if row[kiwiCol] != "False" {
t.Errorf("row %d (start=%s): Kiwi=%s, want False", i, row[1], row[kiwiCol])
}
}
}
if out.PerClassTrueCount["Kiwi"] != 2 {
t.Errorf("PerClassTrueCount[Kiwi] = %d, want 2", out.PerClassTrueCount["Kiwi"])
}
}

func TestClipLabels_GapClipsAllFalse(t *testing.T) {
dir := t.TempDir()
// 15s file, Kiwi segment 0-5 only → clips 5-10 and 10-15 are gaps
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 15},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)

out := clipLabels(t, dir)
if out.ClipsAllFalseGap != 2 {
t.Errorf("ClipsAllFalseGap = %d, want 2", out.ClipsAllFalseGap)
}
if out.PerClassTrueCount["Kiwi"] != 1 {
t.Errorf("PerClassTrueCount[Kiwi] = %d, want 1", out.PerClassTrueCount["Kiwi"])
}
if out.RowsWritten != 3 {
t.Errorf("RowsWritten = %d, want 3", out.RowsWritten)
}
}

func TestClipLabels_NegativeOverridesPositive(t *testing.T) {
dir := t.TempDir()
// Kiwi segment 0-8, Not segment 0-4 → clip 0-5 overlaps both → __NEGATIVE__ wins
// Clip 5-10 overlaps only Kiwi (3s) → True
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 10},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 8, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
{
StartTime: 0, EndTime: 4, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Not", Certainty: 100, Filter: "f1"}},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Not":{"species":"__NEGATIVE__"}}`)

out := clipLabels(t, dir)
if out.ClipsNegative != 1 {
t.Errorf("ClipsNegative = %d, want 1", out.ClipsNegative)
}

_, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
// Clip 0-5: negative hit → all-False (Not overlaps 0-4 by 4s)
if rows[0][3] != "False" {
t.Errorf("clip 0-5 Kiwi = %s, want False (overridden by __NEGATIVE__)", rows[0][3])
}
// Clip 5-10: only Kiwi overlaps (3s) → True
if rows[1][3] != "True" {
t.Errorf("clip 5-10 Kiwi = %s, want True", rows[1][3])
}
}

func TestClipLabels_IgnoreExcludesClip(t *testing.T) {
dir := t.TempDir()
// Don't Know segment 0-5, Kiwi segment 6-10
// Clip 0-5 overlaps __IGNORE__ → excluded
// Clip 5-10 overlaps Kiwi → emitted with True
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 15},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Don't Know", Certainty: 0, Filter: "f1"}},
},
{
StartTime: 6, EndTime: 10, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Don't Know":{"species":"__IGNORE__"}}`)

out := clipLabels(t, dir)
if out.ClipsIgnored != 1 {
t.Errorf("ClipsIgnored = %d, want 1", out.ClipsIgnored)
}
if out.SegmentsIgnored != 1 {
t.Errorf("SegmentsIgnored = %d, want 1", out.SegmentsIgnored)
}
// Only 2 rows: clip 5-10 (Kiwi=True) and clip 10-15 (gap)
if out.RowsWritten != 2 {
t.Errorf("RowsWritten = %d, want 2", out.RowsWritten)
}
}

func TestClipLabels_FilterRestrictsLabels(t *testing.T) {
dir := t.TempDir()
// Same time range, two filters. Only "wanted" should contribute.
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 10},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{
{Species: "Kiwi", Certainty: 100, Filter: "wanted"},
{Species: "Not", Certainty: 100, Filter: "unwanted"},
},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Not":{"species":"__NEGATIVE__"}}`)

out := clipLabels(t, dir, func(in *CallsClipLabelsInput) { in.Filter = "wanted" })
// Only Kiwi from "wanted" filter → clip 0-5 should be Kiwi=True
// Not from "unwanted" filter should be ignored → no __NEGATIVE__ override
if out.ClipsNegative != 0 {
t.Errorf("ClipsNegative = %d, want 0 (Not filter excluded)", out.ClipsNegative)
}
if out.PerClassTrueCount["Kiwi"] != 1 {
t.Errorf("PerClassTrueCount[Kiwi] = %d, want 1", out.PerClassTrueCount["Kiwi"])
}
}

func TestClipLabels_MappingCoverageError(t *testing.T) {
dir := t.TempDir()
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 10},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Mystery", Certainty: 100, Filter: "f1"}},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)

input := CallsClipLabelsInput{
Folder: dir,
MappingPath: filepath.Join(dir, "mapping.json"),
OutputPath: filepath.Join(dir, "clip_labels.csv"),
ClipDuration: 5,
ClipOverlap: 0,
MinLabelOverlap: 0.25,
FinalClip: "full",
}
_, err := CallsClipLabels(input)
if err == nil {
t.Fatal("expected error for missing species in mapping")
}
if !strings.Contains(err.Error(), "Mystery") {
t.Errorf("error should mention missing species, got: %v", err)
}
}

func TestClipLabels_AppendMode(t *testing.T) {
dir := t.TempDir()
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)

// First file
writeDataFile(t, dir, "a.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 5},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
out1 := clipLabels(t, dir)
if out1.RowsWritten != 1 {
t.Fatalf("first run: RowsWritten = %d, want 1", out1.RowsWritten)
}

// Second run on same output file but with a different input folder
// Simulate append by running again — should fail on duplicate
_, err := CallsClipLabels(CallsClipLabelsInput{
Folder: dir,
MappingPath: filepath.Join(dir, "mapping.json"),
OutputPath: filepath.Join(dir, "clip_labels.csv"),
ClipDuration: 5,
ClipOverlap: 0,
MinLabelOverlap: 0.25,
FinalClip: "full",
})
if err == nil {
t.Fatal("expected duplicate error on second run with same folder")
}
if !strings.Contains(err.Error(), "duplicate") {
t.Errorf("error should mention duplicate, got: %v", err)
}
}

func TestClipLabels_MultipleFiles(t *testing.T) {
dir := t.TempDir()
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)

writeDataFile(t, dir, "a.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 10},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
writeDataFile(t, dir, "b.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 5},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})

out := clipLabels(t, dir)
if out.DataFilesParsed != 2 {
t.Errorf("DataFilesParsed = %d, want 2", out.DataFilesParsed)
}
// a: 2 clips (0-5, 5-10), b: 1 clip (0-5) = 3 total
if out.RowsWritten != 3 {
t.Errorf("RowsWritten = %d, want 3", out.RowsWritten)
}

_, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
files := map[string]int{}
for _, r := range rows {
files[r[0]]++
}
if len(files) != 2 {
t.Errorf("expected 2 distinct files in CSV, got %d", len(files))
}
}
file addition: calls_clip_labels.go (----------)

[0.67281]

package calls

import (
"encoding/csv"
"fmt"
"io"
"os"
"path/filepath"
"slices"
"sort"
"strconv"
"strings"

"skraak/utils"
)

// CallsClipLabelsInput configures the clip-labels exporter.
type CallsClipLabelsInput struct {
Folder string `json:"folder"`
MappingPath string `json:"mapping"`
Filter string `json:"filter,omitempty"`
OutputPath string `json:"output"`
ClipDuration float64 `json:"clip_duration"`
ClipOverlap float64 `json:"clip_overlap"`
MinLabelOverlap float64 `json:"min_label_overlap"`
FinalClip string `json:"final_clip"`
}

// CallsClipLabelsOutput summarises a run.
type CallsClipLabelsOutput struct {
Folder string `json:"folder"`
OutputPath string `json:"output"`
Filter string `json:"filter,omitempty"`
Classes []string `json:"classes"`
DataFilesParsed int `json:"data_files_parsed"`
ClipsNegative int `json:"clips_negative"` // emitted, all-False because of __NEGATIVE__
ClipsIgnored int `json:"clips_ignored"` // excluded from output because of __IGNORE__ overlap
SegmentsIgnored int `json:"segments_ignored"` // segments whose species maps to __IGNORE__
ClipsAllFalseGap int `json:"clips_all_false_gap"` // emitted, all-False because no overlap
PerClassTrueCount map[string]int `json:"per_class_true_count"`
AppendedToFile bool `json:"appended_to_file"`
ExistingRowsFound int `json:"existing_rows_found"`
RowsWritten int `json:"rows_written"`
}

// resolvedSeg is a segment that has been classified by the mapping and is
// ready for overlap-checking against clip windows.
type resolvedSeg struct {
start, end float64
kind utils.MappingKind
classIdx int // valid only when kind == utils.MappingReal
}

// clipDisposition describes the outcome for a single clip window.
type clipDisposition int

const (
dispoLabelled clipDisposition = iota // at least one class column is True
dispoNegative // __NEGATIVE__ hit, all class columns False
dispoGap // no segment overlaps, all class columns False
dispoIgnored // __IGNORE__ hit, clip excluded from output
)

// clipLabelsRow is one row of the output CSV.
type clipLabelsRow struct {
file string
start float64
end float64
flags []bool
}

// rowKey is used for duplicate detection.
type rowKey struct {
file string
start string
end string
}

// CallsClipLabels reads .data files from a single folder and writes a CSV in
// OpenSoundScape's clip_labels format: one row per clip per file, with one
// True/False column per class in the mapping.
//
// Mirrors BoxedAnnotations.clip_labels(): every clip window is emitted; a
// column is True when any annotation of that class overlaps the window by
// ≥ min_label_overlap seconds. Sentinel mappings (__NEGATIVE__, __IGNORE__)
// get no column and contribute no labels.

// parsedClipFile holds a parsed .data file for clip-labels processing.
type parsedClipFile struct {
path string
df *utils.DataFile
}

// validateClipLabelsInput validates the input parameters and returns the parsed finalClipMode.
func validateClipLabelsInput(input CallsClipLabelsInput) (utils.FinalClipMode, error) {
finalClipMode, err := utils.ParseFinalClipMode(input.FinalClip)
if err != nil {
return 0, err
}
if input.ClipDuration <= 0 {
return 0, fmt.Errorf("--clip-duration must be > 0, got %v", input.ClipDuration)
}
if input.ClipOverlap < 0 || input.ClipOverlap >= input.ClipDuration {
return 0, fmt.Errorf("--clip-overlap must be in [0, clip-duration), got %v", input.ClipOverlap)
}
if input.MinLabelOverlap <= 0 {
return 0, fmt.Errorf("--min-label-overlap must be > 0, got %v", input.MinLabelOverlap)
}
return finalClipMode, nil
}

// parseClipLabelsDataFiles finds and parses .data files, collecting species seen.
func parseClipLabelsDataFiles(folder, filter string, mapping utils.MappingFile) ([]parsedClipFile, error) {
dataPaths, err := utils.FindDataFiles(folder)
if err != nil {
return nil, fmt.Errorf("scan folder %s: %w", folder, err)
}
if len(dataPaths) == 0 {
return nil, fmt.Errorf("no .data files found in %s", folder)
}

speciesSeen := map[string]bool{}
parsed := make([]parsedClipFile, 0, len(dataPaths))
for _, p := range dataPaths {
df, err := utils.ParseDataFile(p)
if err != nil {
return nil, fmt.Errorf("parse %s: %w", p, err)
}
if df.Meta == nil || df.Meta.Duration <= 0 {
return nil, fmt.Errorf("missing or non-positive Duration in %s (cannot generate clips)", p)
}
for _, seg := range df.Segments {
for _, lbl := range seg.Labels {
if filter != "" && lbl.Filter != filter {
continue
}
speciesSeen[lbl.Species] = true
}
}
parsed = append(parsed, parsedClipFile{path: p, df: df})
}

if missing := mapping.ValidateCoversSpecies(speciesSeen); len(missing) > 0 {
return nil, fmt.Errorf("mapping.json is missing entries for species: %s\n(run /data-mapping to regenerate)", strings.Join(missing, ", "))
}
return parsed, nil
}

// dedupClipLabelsRows checks for duplicate rows within new rows and against existing CSV rows.
func dedupClipLabelsRows(rows []clipLabelsRow, existing map[rowKey]bool) error {
dedup := make(map[rowKey]bool, len(existing)+len(rows))
for k := range existing {
dedup[k] = true
}
for _, r := range rows {
k := rowKey{file: r.file, start: formatTime(r.start), end: formatTime(r.end)}
if dedup[k] {
return fmt.Errorf("duplicate clip detected: file=%s start=%s end=%s", k.file, k.start, k.end)
}
dedup[k] = true
}
return nil
}

func CallsClipLabels(input CallsClipLabelsInput) (CallsClipLabelsOutput, error) {
out := CallsClipLabelsOutput{
Folder: input.Folder,
OutputPath: input.OutputPath,
PerClassTrueCount: map[string]int{},
}

finalClipMode, err := validateClipLabelsInput(input)
if err != nil {
return out, err
}

mapping, err := utils.LoadMappingFile(input.MappingPath)
if err != nil {
return out, fmt.Errorf("load mapping %s: %w", input.MappingPath, err)
}

classes := mapping.Classes()
if len(classes) == 0 {
return out, fmt.Errorf("mapping.json has no real (non-sentinel) classes")
}
out.Classes = classes
out.Filter = input.Filter
classIdx := map[string]int{}
for i, c := range classes {
classIdx[c] = i
}

parsed, err := parseClipLabelsDataFiles(input.Folder, input.Filter, mapping)
if err != nil {
return out, err
}
out.DataFilesParsed = len(parsed)

expectedHeader := append([]string{"file", "start_time", "end_time"}, classes...)
existing, appendMode, err := loadExistingRows(input.OutputPath, expectedHeader)
if err != nil {
return out, err
}
out.AppendedToFile = appendMode
out.ExistingRowsFound = len(existing)

cwd, err := os.Getwd()
if err != nil {
return out, fmt.Errorf("getwd: %w", err)
}
folderAbs, err := filepath.Abs(input.Folder)
if err != nil {
return out, fmt.Errorf("abs %s: %w", input.Folder, err)
}

rows := make([]clipLabelsRow, 0, 1024)
for _, pf := range parsed {
fileRows, err := processClipLabelsFile(pf.path, pf.df, mapping, classIdx, classes, input, finalClipMode, cwd, folderAbs, &out)
if err != nil {
return out, err
}
rows = append(rows, fileRows...)
}

if err := dedupClipLabelsRows(rows, existing); err != nil {
return out, err
}

if err := writeRows(input.OutputPath, expectedHeader, rows, appendMode); err != nil {
return out, err
}
out.RowsWritten = len(rows)

sort.Strings(out.Classes)
return out, nil
}

// processClipLabelsFile generates clip-labels rows for a single .data file.
func processClipLabelsFile(
path string,
df *utils.DataFile,
mapping utils.MappingFile,
classIdx map[string]int,
classes []string,
input CallsClipLabelsInput,
finalClipMode utils.FinalClipMode,
cwd, folderAbs string,
out *CallsClipLabelsOutput,
) ([]clipLabelsRow, error) {
windows, err := utils.GenerateClipTimes(
df.Meta.Duration,
input.ClipDuration,
input.ClipOverlap,
finalClipMode,
10,
)
if err != nil {
return nil, fmt.Errorf("generate clip windows for %s: %w", path, err)
}
if len(windows) == 0 {
return nil, nil
}

segs := resolveSegments(df.Segments, input.Filter, input.MinLabelOverlap, mapping, classIdx, out)

rel, err := computeWavRelPath(path, cwd, folderAbs)
if err != nil {
return nil, err
}

return labelClipWindows(windows, segs, rel, classes, input.MinLabelOverlap, out), nil
}

// resolveSegments maps segments to their classification and filters out mismatches.
func resolveSegments(
segments []*utils.Segment,
filter string,
minLabelOverlap float64,
mapping utils.MappingFile,
classIdx map[string]int,
out *CallsClipLabelsOutput,
) []resolvedSeg {
segs := make([]resolvedSeg, 0, len(segments))
for _, seg := range segments {
if seg.EndTime-seg.StartTime < minLabelOverlap {
continue
}
for _, lbl := range seg.Labels {
if filter != "" && lbl.Filter != filter {
continue
}
canon, kind, ok := mapping.Classify(lbl.Species)
if !ok {
continue
}
switch kind {
case utils.MappingIgn:
out.SegmentsIgnored++
segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind})
case utils.MappingNeg:
segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind})
case utils.MappingReal:
idx, present := classIdx[canon]
if !present {
continue
}
segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind, classIdx: idx})
}
}
}
return segs
}

// computeWavRelPath computes the relative path from cwd to the WAV file corresponding to a .data file.
func computeWavRelPath(dataPath, cwd, folderAbs string) (string, error) {
wavName := strings.TrimSuffix(filepath.Base(dataPath), ".data")
wavAbs := filepath.Join(folderAbs, wavName)
rel, err := filepath.Rel(cwd, wavAbs)
if err != nil {
rel = wavAbs
}
// Ensure relative paths start with ./ to match OPSO / pandas convention.
if rel != "" && !filepath.IsAbs(rel) && !strings.HasPrefix(rel, "."+string(filepath.Separator)) {
rel = "." + string(filepath.Separator) + rel
}
return rel, nil
}

// labelClipWindows classifies each clip window and builds the output rows.
func labelClipWindows(windows []utils.ClipWindow, segs []resolvedSeg, rel string, classes []string, minLabelOverlap float64, out *CallsClipLabelsOutput) []clipLabelsRow {
var rows []clipLabelsRow
for _, w := range windows {
dispo, classHits := classifyClip(w, segs, minLabelOverlap, len(classes))

if dispo == dispoIgnored {
out.ClipsIgnored++
continue
}

row := clipLabelsRow{
file: rel,
start: w.Start,
end: w.End,
flags: make([]bool, len(classes)),
}

switch dispo {
case dispoNegative:
out.ClipsNegative++
case dispoGap:
out.ClipsAllFalseGap++
case dispoLabelled:
for i, hit := range classHits {
if hit {
row.flags[i] = true
out.PerClassTrueCount[classes[i]]++
}
}
}
rows = append(rows, row)
}
return rows
}

// classifyClip determines the disposition of a single clip window against
// the resolved segments. Priority: __IGNORE__ > __NEGATIVE__ > class labels.
func classifyClip(w utils.ClipWindow, segs []resolvedSeg, minLabelOverlap float64, nClasses int) (clipDisposition, []bool) {
ignoreHit := false
negativeHit := false
classHits := make([]bool, nClasses)

for _, s := range segs {
if overlapSeconds(s.start, s.end, w.Start, w.End) < minLabelOverlap {
continue
}
switch s.kind {
case utils.MappingIgn:
ignoreHit = true
case utils.MappingNeg:
negativeHit = true
case utils.MappingReal:
classHits[s.classIdx] = true
}
}

if ignoreHit {
return dispoIgnored, nil
}
if negativeHit {
return dispoNegative, classHits
}
for _, hit := range classHits {
if hit {
return dispoLabelled, classHits
}
}
return dispoGap, classHits
}

// loadExistingRows reads an existing output CSV and returns its row keys
// (for deduplication) and whether we're in append mode.
func loadExistingRows(outputPath string, expectedHeader []string) (map[rowKey]bool, bool, error) {
fi, err := os.Stat(outputPath)
if err != nil {
if os.IsNotExist(err) {
return nil, false, nil
}
return nil, false, fmt.Errorf("stat %s: %w", outputPath, err)
}
if fi.Size() == 0 {
return nil, false, nil
}

f, err := os.Open(outputPath)
if err != nil {
return nil, false, fmt.Errorf("open existing %s: %w", outputPath, err)
}
defer func() { _ = f.Close() }()

r := csv.NewReader(f)
r.FieldsPerRecord = -1

header, err := r.Read()
if err != nil {
return nil, false, fmt.Errorf("read header of existing %s: %w", outputPath, err)
}
if !slices.Equal(header, expectedHeader) {
return nil, false, fmt.Errorf("column-set mismatch in existing %s\n existing: %s\n new: %s",
outputPath, strings.Join(header, ","), strings.Join(expectedHeader, ","))
}

existing := map[rowKey]bool{}
for {
rec, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
return nil, false, fmt.Errorf("read row of existing %s: %w", outputPath, err)
}
if len(rec) < 3 {
return nil, false, fmt.Errorf("malformed row in existing %s: %v", outputPath, rec)
}
existing[rowKey{file: rec[0], start: rec[1], end: rec[2]}] = true
}

return existing, true, nil
}

// overlapSeconds returns the duration of overlap between two half-open intervals.
func overlapSeconds(aStart, aEnd, bStart, bEnd float64) float64 {
lo := max(aStart, bStart)
hi := min(aEnd, bEnd)
if hi <= lo {
return 0
}
return hi - lo
}

// formatTime renders a float to match pandas' default float repr in to_csv:
// always at least one decimal place, no trailing zeros beyond what's needed.
// e.g. 5 → "5.0", 5.5 → "5.5", 3.5001250000 → "3.500125".
func formatTime(v float64) string {
s := strconv.FormatFloat(v, 'f', -1, 64)
if !strings.ContainsRune(s, '.') {
s += ".0"
}
return s
}

// writeRows writes the clip-labels rows to a CSV file.
func writeRows(path string, header []string, rows []clipLabelsRow, appendMode bool) error {
var f *os.File
var err error
if appendMode {
f, err = os.OpenFile(path, os.O_APPEND|os.O_WRONLY, 0644)
} else {
f, err = os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
}
if err != nil {
return fmt.Errorf("open %s for write: %w", path, err)
}
defer func() { _ = f.Close() }()

w := csv.NewWriter(f)
if !appendMode {
if err := w.Write(header); err != nil {
return fmt.Errorf("write header: %w", err)
}
}

if len(rows) == 0 {
w.Flush()
return w.Error()
}
rec := make([]string, 3+len(rows[0].flags))
for _, r := range rows {
rec[0] = r.file
rec[1] = formatTime(r.start)
rec[2] = formatTime(r.end)
for i, b := range r.flags {
if b {
rec[3+i] = "True"
} else {
rec[3+i] = "False"
}
}
if err := w.Write(rec); err != nil {
return fmt.Errorf("write row: %w", err)
}
}
w.Flush()
return w.Error()
}
file addition: calls_clip_bench_test.go (----------)

[0.67281]

package calls

import (
"encoding/binary"
"math"
"os"
"testing"

"skraak/utils"
)

const benchWAV = "../../audio/20211028_211500.WAV"

// ==================== WAV I/O ====================

func BenchmarkReadWAV(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_, _, err := utils.ReadWAVSamples(benchWAV)
if err != nil {
b.Fatal(err)
}
}
}

func BenchmarkConvertToFloat64_16bit(b *testing.B) {
// Simulate 16-bit mono WAV data (same size as test file: 14.32M samples)
numSamples := 14320000
data := make([]byte, numSamples*2)
for i := range numSamples {
binary.LittleEndian.PutUint16(data[i*2:], uint16(i%65536))
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_ = convertToFloat64Bench(data, 16, 1)
}
}

// Duplicate of convertToFloat64 for benchmarking (unexported in utils)
func convertToFloat64Bench(data []byte, bitsPerSample, channels int) []float64 {
bytesPerSample := bitsPerSample / 8
blockAlign := bytesPerSample * channels
numSamples := len(data) / blockAlign
samples := make([]float64, numSamples)
for i := range numSamples {
offset := i * blockAlign
sample := int16(binary.LittleEndian.Uint16(data[offset : offset+2]))
samples[i] = float64(sample) / 32768.0
}
return samples
}

func BenchmarkWriteWAV(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
b.Logf("segment samples=%d", len(segSamples))
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
f, _ := os.CreateTemp("", "bench_*.wav")
utils.WriteWAVFile(f.Name(), segSamples, sr)
f.Close()
os.Remove(f.Name())
}
}

// ==================== Resample ====================

func BenchmarkResampleRate_48k(b *testing.B) {
samples, _, _ := utils.ReadWAVSamples(benchWAV)
b.Logf("resampling %d samples 48000->16000", len(samples))
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
utils.ResampleRate(samples, 48000, 16000)
}
}

func BenchmarkResampleRate_250k(b *testing.B) {
samples, _, _ := utils.ReadWAVSamples(benchWAV)
b.Logf("resampling %d samples 250000->16000", len(samples))
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
utils.ResampleRate(samples, 250000, 16000)
}
}

// ==================== Spectrogram pipeline ====================

func BenchmarkExtractSegment(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
b.Logf("full file: %d samples, sr=%d", len(samples), sr)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
seg := utils.ExtractSegmentSamples(samples, sr, 872, 895)
if len(seg) == 0 {
b.Fatal("empty segment")
}
}
}

func BenchmarkPowerSpectrumFFT_512(b *testing.B) {
n := 512
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
frameData := make([]float64, n)
power := make([]float64, n/2+1)
scratch := make([]complex128, n)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
// Simulate the windowing step (Hann) + FFT
for j := range n {
frameData[j] = segSamples[j] * 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(j)/float64(n-1)))
}
utils.PowerSpectrumFFT(frameData, power, scratch)
}
}

func BenchmarkSpectrogram_23s(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
cfg := utils.DefaultSpectrogramConfig(16000)
b.Logf("segment samples=%d, windowSize=%d, hopSize=%d", len(segSamples), cfg.WindowSize, cfg.HopSize)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
spect := utils.GenerateSpectrogram(segSamples, cfg)
if spect == nil {
b.Fatal("nil spectrogram")
}
}
}

func BenchmarkSpectrogram_60s(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 0, 60)
cfg := utils.DefaultSpectrogramConfig(16000)
b.Logf("60s segment samples=%d", len(segSamples))
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
spect := utils.GenerateSpectrogram(segSamples, cfg)
if spect == nil {
b.Fatal("nil spectrogram")
}
}
}

// ==================== Image creation & resize ====================

func BenchmarkCreateGrayscaleImage(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
cfg := utils.DefaultSpectrogramConfig(16000)
spect := utils.GenerateSpectrogram(segSamples, cfg)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
img := utils.CreateGrayscaleImage(spect)
if img == nil {
b.Fatal("nil image")
}
}
}

func BenchmarkCreateRGBImage(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
cfg := utils.DefaultSpectrogramConfig(16000)
spect := utils.GenerateSpectrogram(segSamples, cfg)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
colorData := utils.ApplyL4Colormap(spect)
img := utils.CreateRGBImage(colorData)
if img == nil {
b.Fatal("nil image")
}
}
}

func BenchmarkApplyL4Colormap(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
cfg := utils.DefaultSpectrogramConfig(16000)
spect := utils.GenerateSpectrogram(segSamples, cfg)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
colorData := utils.ApplyL4Colormap(spect)
if colorData == nil {
b.Fatal("nil colormap")
}
}
}

func BenchmarkResizeGray224(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
cfg := utils.DefaultSpectrogramConfig(16000)
spect := utils.GenerateSpectrogram(segSamples, cfg)
img := utils.CreateGrayscaleImage(spect)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
resized := utils.ResizeImage(img, 224, 224)
if resized == nil {
b.Fatal("nil resize")
}
}
}

func BenchmarkResizeGray448(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
cfg := utils.DefaultSpectrogramConfig(16000)
spect := utils.GenerateSpectrogram(segSamples, cfg)
img := utils.CreateGrayscaleImage(spect)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
resized := utils.ResizeImage(img, 448, 448)
if resized == nil {
b.Fatal("nil resize")
}
}
}

// ==================== PNG write ====================

func BenchmarkWritePNG_224(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
cfg := utils.DefaultSpectrogramConfig(16000)
spect := utils.GenerateSpectrogram(segSamples, cfg)
img := utils.CreateGrayscaleImage(spect)
resized := utils.ResizeImage(img, 224, 224)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
f, _ := os.CreateTemp("", "bench_*.png")
utils.WritePNG(resized, f)
f.Close()
os.Remove(f.Name())
}
}

// ==================== Full pipeline ====================

func BenchmarkFullPipelineGray224(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
outputSR := sr
if sr > 16000 {
segSamples = utils.ResampleRate(segSamples, sr, 16000)
outputSR = 16000
}
cfg := utils.DefaultSpectrogramConfig(outputSR)
spect := utils.GenerateSpectrogram(segSamples, cfg)
img := utils.CreateGrayscaleImage(spect)
resized := utils.ResizeImage(img, 224, 224)
f, _ := os.CreateTemp("", "bench_*.png")
utils.WritePNG(resized, f)
f.Close()
os.Remove(f.Name())
utils.WriteWAVFile(f.Name(), segSamples, outputSR)
os.Remove(f.Name())
_ = resized
}
}

func BenchmarkFullPipelineColor448(b *testing.B) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)
outputSR := sr
if sr > 16000 {
segSamples = utils.ResampleRate(segSamples, sr, 16000)
outputSR = 16000
}
cfg := utils.DefaultSpectrogramConfig(outputSR)
spect := utils.GenerateSpectrogram(segSamples, cfg)
colorData := utils.ApplyL4Colormap(spect)
img := utils.CreateRGBImage(colorData)
resized := utils.ResizeImage(img, 448, 448)
f, _ := os.CreateTemp("", "bench_*.png")
utils.WritePNG(resized, f)
f.Close()
os.Remove(f.Name())
utils.WriteWAVFile(f.Name(), segSamples, outputSR)
os.Remove(f.Name())
_ = resized
}
}

// ==================== Data dimension report ====================

func TestPipelineDimensions(t *testing.T) {
samples, sr, _ := utils.ReadWAVSamples(benchWAV)
segSamples := utils.ExtractSegmentSamples(samples, sr, 872, 895)

t.Logf("Input: %d samples, sr=%d, segment=%d samples (%.1fs)",
len(samples), sr, len(segSamples), float64(len(segSamples))/float64(sr))

cfg := utils.DefaultSpectrogramConfig(16000)
numFrames := (len(segSamples)-cfg.WindowSize)/cfg.HopSize + 1
numBins := cfg.WindowSize/2 + 1
t.Logf("Spectrogram: %d freq bins x %d time frames = %d values",
numBins, numFrames, numBins*numFrames)

spect := utils.GenerateSpectrogram(segSamples, cfg)
t.Logf("Output: %d x %d (freq x time)", len(spect), len(spect[0]))

img := utils.CreateGrayscaleImage(spect)
t.Logf("Grayscale image: %dx%d pixels, %d bytes",
img.Bounds().Dx(), img.Bounds().Dy(), img.Bounds().Dx()*img.Bounds().Dy())

resized := utils.ResizeImage(img, 224, 224)
t.Logf("Resized 224: %dx%d", resized.Bounds().Dx(), resized.Bounds().Dy())

resized448 := utils.ResizeImage(img, 448, 448)
t.Logf("Resized 448: %dx%d", resized448.Bounds().Dx(), resized448.Bounds().Dy())
}
file addition: calls_clip.go (----------)

[0.67281]

package calls

import (
"fmt"
"image"
"math"
"os"
"path/filepath"
"runtime"
"strings"
"sync"

"skraak/utils"
)

// CallsClipInput defines the input for the clip tool
type CallsClipInput struct {
File string `json:"file"`
Folder string `json:"folder"`
Output string `json:"output"`
Prefix string `json:"prefix"`
Filter string `json:"filter"`
Species string `json:"species"`
Certainty int `json:"certainty"`
Size int `json:"size"`
Color bool `json:"color"`

Night bool `json:"night"`
Day bool `json:"day"`
Location string `json:"location,omitempty"`
}

// CallsClipOutput defines the output for the clip tool
type CallsClipOutput struct {
FilesProcessed int `json:"files_processed"`
SegmentsClipped int `json:"segments_clipped"`
NightSkipped int `json:"night_skipped,omitempty"`
DaySkipped int `json:"day_skipped,omitempty"`
OutputFiles []string `json:"output_files"`
Errors []string `json:"errors,omitempty"`
}

// CallsClip processes .data files and generates audio/image clips for matching segments
func CallsClip(input CallsClipInput) (CallsClipOutput, error) {
var output CallsClipOutput

// Validate required flags
if err := validateClipInput(&output, input); err != nil {
return output, err
}

// Parse species+calltype
speciesName, callType := utils.ParseSpeciesCallType(input.Species)

// Get list of .data files
filePaths, err := resolveClipFiles(&output, input)
if err != nil {
return output, err
}

// Create output folder if it doesn't exist
if err := os.MkdirAll(input.Output, 0755); err != nil {
output.Errors = append(output.Errors, fmt.Sprintf("failed to create output folder: %v", err))
return output, err
}

// Clamp image size to valid range
imgSize := utils.ClampImageSize(input.Size)

// Parse location into lat/lng/timezone
var lat, lng float64
var timezone string
if input.Location != "" {
var err error
lat, lng, timezone, err = utils.ParseLocation(input.Location)
if err != nil {
output.Errors = append(output.Errors, err.Error())
return output, err
}
}

// Process .data files (parallel for larger batches)
if len(filePaths) <= 2 {
processFilesSequential(&output, filePaths, input, speciesName, callType, imgSize, lat, lng, timezone)
} else {
processFilesParallel(&output, filePaths, input, speciesName, callType, imgSize, lat, lng, timezone)
}

return output, nil
}

// validateClipInput validates required flags for clip generation.
func validateClipInput(output *CallsClipOutput, input CallsClipInput) error {
if input.File == "" && input.Folder == "" {
output.Errors = append(output.Errors, "either --file or --folder is required")
return fmt.Errorf("missing required flag: --file or --folder")
}
if input.Output == "" {
output.Errors = append(output.Errors, "--output is required")
return fmt.Errorf("missing required flag: --output")
}
if input.Prefix == "" {
output.Errors = append(output.Errors, "--prefix is required")
return fmt.Errorf("missing required flag: --prefix")
}
return nil
}

// resolveClipFiles returns the list of .data file paths from input.
func resolveClipFiles(output *CallsClipOutput, input CallsClipInput) ([]string, error) {
if input.File != "" {
return []string{input.File}, nil
}
filePaths, err := utils.FindDataFiles(input.Folder)
if err != nil {
output.Errors = append(output.Errors, fmt.Sprintf("failed to find .data files: %v", err))
return nil, err
}
if len(filePaths) == 0 {
output.Errors = append(output.Errors, "no .data files found")
return nil, fmt.Errorf("no .data files found")
}
return filePaths, nil
}

// processFilesSequential processes .data files one at a time.
func processFilesSequential(output *CallsClipOutput, filePaths []string, input CallsClipInput, speciesName, callType string, imgSize int, lat, lng float64, timezone string) {
for _, dataPath := range filePaths {
clips, skipped, errs := processFile(dataPath, input.Output, input.Prefix, input.Filter, speciesName, callType, input.Certainty, imgSize, input.Color, input.Night, input.Day, lat, lng, timezone)
accumulateFileResult(output, clips, skipped, errs, input.Night)
}
}

// processFilesParallel processes .data files using worker goroutines.
func processFilesParallel(output *CallsClipOutput, filePaths []string, input CallsClipInput, speciesName, callType string, imgSize int, lat, lng float64, timezone string) {
type fileResult struct {
clips []string
skipped int
errs []string
}

workers := min(runtime.NumCPU(), 8, len(filePaths))
jobs := make(chan string, len(filePaths))
results := make(chan fileResult, len(filePaths))

var wg sync.WaitGroup
for range workers {
wg.Go(func() {
for dataPath := range jobs {
clips, skipped, errs := processFile(dataPath, input.Output, input.Prefix, input.Filter, speciesName, callType, input.Certainty, imgSize, input.Color, input.Night, input.Day, lat, lng, timezone)
results <- fileResult{clips: clips, skipped: skipped, errs: errs}
}
})
}

for _, dataPath := range filePaths {
jobs <- dataPath
}
close(jobs)

go func() {
wg.Wait()
close(results)
}()

for r := range results {
accumulateFileResult(output, r.clips, r.skipped, r.errs, input.Night)
}
}

// accumulateFileResult merges a single file's results into the output.
func accumulateFileResult(output *CallsClipOutput, clips []string, skipped int, errs []string, night bool) {
output.SegmentsClipped += len(clips)
if night {
output.NightSkipped += skipped
} else {
output.DaySkipped += skipped
}
output.OutputFiles = append(output.OutputFiles, clips...)
output.Errors = append(output.Errors, errs...)
if len(clips) > 0 || len(errs) == 0 {
output.FilesProcessed++
}
}

// processFile processes a single .data file and returns generated clips, time-filter-skipped count, and errors
func processFile(dataPath, outputDir, prefix, filter, speciesName, callType string, certainty, imgSize int, color, night, day bool, lat, lng float64, timezone string) ([]string, int, []string) {
var clips []string
var errors []string

// Parse .data file
dataFile, err := utils.ParseDataFile(dataPath)
if err != nil {
errors = append(errors, fmt.Sprintf("%s: failed to parse: %v", dataPath, err))
return nil, 0, errors
}

// Get WAV basename (without path and extensions)
wavPath := filepath.Clean(strings.TrimSuffix(dataPath, ".data"))
basename := filepath.Base(wavPath)
basename = strings.TrimSuffix(basename, filepath.Ext(basename))

// Filter segments
matchingSegments := filterSegments(dataFile.Segments, filter, speciesName, callType, certainty)
if len(matchingSegments) == 0 {
return nil, 0, nil
}

// Day/night filter: check WAV header only (cheaper than reading full audio).
if night || day {
skipped, err := checkDayNightFilter(wavPath, night, day, lat, lng, timezone)
if err != nil || skipped {
if skipped {
return nil, 1, nil
}
return nil, 0, nil
}
}

// Read WAV samples once
samples, sampleRate, err := utils.ReadWAVSamples(wavPath)
if err != nil {
errors = append(errors, fmt.Sprintf("%s: failed to read WAV: %v", dataPath, err))
return nil, 0, errors
}

// Process matching segments
clips, errors = processSegments(matchingSegments, dataPath, samples, sampleRate, outputDir, prefix, basename, imgSize, color)
return clips, 0, errors
}

// filterSegments returns segments matching the given filter criteria.
func filterSegments(segments []*utils.Segment, filter, speciesName, callType string, certainty int) []*utils.Segment {
var matching []*utils.Segment
for _, seg := range segments {
if seg.SegmentMatchesFilters(filter, speciesName, callType, certainty) {
matching = append(matching, seg)
}
}
return matching
}

// checkDayNightFilter applies day/night filtering. Returns (skipped=true, nil) if the
// recording should be skipped, (false, nil) if it passes, or (false, err) on failure.
func checkDayNightFilter(wavPath string, night, day bool, lat, lng float64, timezone string) (bool, error) {
result, err := IsNight(IsNightInput{
FilePath: wavPath,
Lat: lat,
Lng: lng,
Timezone: timezone,
})
if err != nil {
fmt.Fprintf(os.Stderr, "warning: skipping %s (isnight error: %v)\n", wavPath, err)
return false, err
}
if night && !result.SolarNight {
fmt.Fprintf(os.Stderr, "skipped (daytime): %s\n", wavPath)
return true, nil
}
if day && !result.DiurnalActive {
fmt.Fprintf(os.Stderr, "skipped (nighttime): %s\n", wavPath)
return true, nil
}
return false, nil
}

// processSegments generates clips for matching segments, using parallel processing for larger batches.
func processSegments(segments []*utils.Segment, dataPath string, samples []float64, sampleRate int, outputDir, prefix, basename string, imgSize int, color bool) ([]string, []string) {
var clips []string
var errors []string

if len(segments) <= 2 {
for _, seg := range segments {
clipFiles, err := generateClip(samples, sampleRate, outputDir, prefix, basename, seg.StartTime, seg.EndTime, imgSize, color)
if err != nil {
errors = append(errors, fmt.Sprintf("%s: segment %.0f-%.0f: %v", dataPath, seg.StartTime, seg.EndTime, err))
continue
}
clips = append(clips, clipFiles...)
}
} else {
clips, errors = processSegmentsParallel(segments, dataPath, samples, sampleRate, outputDir, prefix, basename, imgSize, color)
}

return clips, errors
}

// processSegmentsParallel generates clips for segments using worker goroutines.
func processSegmentsParallel(segments []*utils.Segment, dataPath string, samples []float64, sampleRate int, outputDir, prefix, basename string, imgSize int, color bool) ([]string, []string) {
type segResult struct {
clips []string
err string
}

workers := min(runtime.NumCPU(), len(segments))
jobs := make(chan *utils.Segment, len(segments))
results := make(chan segResult, len(segments))

var wg sync.WaitGroup
for range workers {
wg.Go(func() {
for seg := range jobs {
clipFiles, err := generateClip(samples, sampleRate, outputDir, prefix, basename, seg.StartTime, seg.EndTime, imgSize, color)
if err != nil {
results <- segResult{err: fmt.Sprintf("%s: segment %.0f-%.0f: %v", dataPath, seg.StartTime, seg.EndTime, err)}
} else {
results <- segResult{clips: clipFiles}
}
}
})
}

for _, seg := range segments {
jobs <- seg
}
close(jobs)

go func() {
wg.Wait()
close(results)
}()

var clips []string
var errors []string
for r := range results {
if r.err != "" {
errors = append(errors, r.err)
} else {
clips = append(clips, r.clips...)
}
}
return clips, errors
}

// generateClip generates PNG and WAV files for a segment
func generateClip(samples []float64, sampleRate int, outputDir, prefix, basename string, startTime, endTime float64, imgSize int, color bool) ([]string, error) {
var files []string

// Calculate integer times for filename
startInt := int(math.Floor(startTime))
endInt := int(math.Ceil(endTime))

// Build base filename
baseName := fmt.Sprintf("%s_%s_%d_%d", prefix, basename, startInt, endInt)
wavPath := filepath.Join(outputDir, baseName+".wav")

// Extract segment samples
segSamples := utils.ExtractSegmentSamples(samples, sampleRate, startTime, endTime)
if len(segSamples) == 0 {
return nil, fmt.Errorf("no samples in segment")
}

// Determine output sample rate (downsample if > 16kHz)
outputSampleRate := sampleRate
if sampleRate > utils.DefaultMaxSampleRate {
segSamples = utils.ResampleRate(segSamples, sampleRate, utils.DefaultMaxSampleRate)
outputSampleRate = utils.DefaultMaxSampleRate
}

pngPath := filepath.Join(outputDir, baseName+".png")

spectSampleRate := outputSampleRate
config := utils.DefaultSpectrogramConfig(spectSampleRate)
spectrogram := utils.GenerateSpectrogram(segSamples, config)
if spectrogram == nil {
return nil, fmt.Errorf("failed to generate spectrogram")
}

// Create image (grayscale or color)
var img image.Image
if color {
colorData := utils.ApplyL4Colormap(spectrogram)
img = utils.CreateRGBImage(colorData)
} else {
img = utils.CreateGrayscaleImage(spectrogram)
}
if img == nil {
return nil, fmt.Errorf("failed to create image")
}

resized := utils.ResizeImage(img, imgSize, imgSize)

// Write PNG (O_EXCL fails atomically if file exists)
pngFile, err := os.OpenFile(pngPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0644)
if err != nil {
if os.IsExist(err) {
return nil, fmt.Errorf("file already exists: %s", pngPath)
}
return nil, fmt.Errorf("failed to create PNG: %w", err)
}
if err := utils.WritePNG(resized, pngFile); err != nil {
_ = pngFile.Close()
return nil, fmt.Errorf("failed to write PNG: %w", err)
}
if err := pngFile.Close(); err != nil {
return nil, fmt.Errorf("failed to close PNG: %w", err)
}
files = append(files, pngPath)

// Write WAV
if err := utils.WriteWAVFile(wavPath, segSamples, outputSampleRate); err != nil {
return nil, fmt.Errorf("failed to write WAV: %w", err)
}
files = append(files, wavPath)

return files, nil
}
file addition: calls_classify_test.go (----------)

[0.67281]

package calls

import (
"testing"

"skraak/utils"
)

func NewClassifyState(config ClassifyConfig, dataFiles []*utils.DataFile) *ClassifyState {
hasFilter := config.Filter != "" || config.Species != "" || config.Certainty >= 0
cached := make([][]*utils.Segment, len(dataFiles))
for i, df := range dataFiles {
if !hasFilter {
cached[i] = df.Segments
} else {
for _, seg := range df.Segments {
if seg.SegmentMatchesFilters(config.Filter, config.Species, config.CallType, config.Certainty) {
cached[i] = append(cached[i], seg)
}
}
}
}
total := 0
for _, segs := range cached {
total += len(segs)
}
return &ClassifyState{
Config: config,
DataFiles: dataFiles,
filteredSegs: cached,
totalSegs: total,
}
}

func TestParseKeyBuffer(t *testing.T) {
bindings := []KeyBinding{
{Key: "k", Species: "Kiwi"},
{Key: "d", Species: "Kiwi", CallType: "Duet"},
{Key: "n", Species: "Don't Know"},
{Key: "p", Species: "Morepork"},
}

state := NewClassifyState(ClassifyConfig{Bindings: bindings, Certainty: -1}, nil)

tests := []struct {
key string
want *BindingResult
wantNil bool
}{
{"k", &BindingResult{Species: "Kiwi"}, false},
{"d", &BindingResult{Species: "Kiwi", CallType: "Duet"}, false},
{"n", &BindingResult{Species: "Don't Know"}, false},
{"p", &BindingResult{Species: "Morepork"}, false},
{"x", nil, true}, // unknown key
}

for _, tt := range tests {
got := state.ParseKeyBuffer(tt.key)
if tt.wantNil {
if got != nil {
t.Errorf("ParseKeyBuffer(%q) = %v, want nil", tt.key, got)
}
} else {
if got == nil {
t.Errorf("ParseKeyBuffer(%q) = nil, want %+v", tt.key, tt.want)
continue
}
if got.Species != tt.want.Species {
t.Errorf("ParseKeyBuffer(%q).Species = %q, want %q", tt.key, got.Species, tt.want.Species)
}
if got.CallType != tt.want.CallType {
t.Errorf("ParseKeyBuffer(%q).CallType = %q, want %q", tt.key, got.CallType, tt.want.CallType)
}
}
}
}

func TestApplyBinding(t *testing.T) {
bindings := []KeyBinding{
{Key: "k", Species: "Kiwi"},
{Key: "n", Species: "Don't Know"},
{Key: "d", Species: "Kiwi", CallType: "Duet"},
}

df := &utils.DataFile{
Meta: &utils.DataMeta{},
Segments: []*utils.Segment{
{
StartTime: 10.0,
EndTime: 20.0,
Labels: []*utils.Label{
{Species: "Unknown", Certainty: 50, Filter: "test-filter", CallType: "OldType"},
},
},
},
}

state := NewClassifyState(ClassifyConfig{
Filter: "test-filter",
Reviewer: "David",
Bindings: bindings,
Certainty: -1,
}, []*utils.DataFile{df})

// Apply "k" = Kiwi (no calltype, should remove existing calltype)
result := &BindingResult{Species: "Kiwi"}
state.ApplyBinding(result)

// Check label was updated
if len(df.Segments[0].Labels) != 1 {
t.Errorf("expected 1 label, got %d", len(df.Segments[0].Labels))
}
if df.Segments[0].Labels[0].Species != "Kiwi" {
t.Errorf("expected Species=Kiwi, got %s", df.Segments[0].Labels[0].Species)
}
if df.Segments[0].Labels[0].Certainty != 100 {
t.Errorf("expected Certainty=100, got %d", df.Segments[0].Labels[0].Certainty)
}
if df.Segments[0].Labels[0].CallType != "" {
t.Errorf("expected CallType='', got %s (should be removed)", df.Segments[0].Labels[0].CallType)
}
if df.Meta.Reviewer != "David" {
t.Errorf("expected Reviewer=David, got %s", df.Meta.Reviewer)
}

// Apply "d" = Kiwi/Duet (should set calltype)
result = &BindingResult{Species: "Kiwi", CallType: "Duet"}
state.ApplyBinding(result)

if df.Segments[0].Labels[0].CallType != "Duet" {
t.Errorf("expected CallType=Duet, got %s", df.Segments[0].Labels[0].CallType)
}

// Apply "n" = Don't Know (certainty should be 0)
result = &BindingResult{Species: "Don't Know"}
state.ApplyBinding(result)

if df.Segments[0].Labels[0].Species != "Don't Know" {
t.Errorf("expected Species=Don't Know, got %s", df.Segments[0].Labels[0].Species)
}
if df.Segments[0].Labels[0].Certainty != 0 {
t.Errorf("expected Certainty=0 for Don't Know, got %d", df.Segments[0].Labels[0].Certainty)
}
}

func TestApplyBindingCallTypeRemoval(t *testing.T) {
bindings := []KeyBinding{
{Key: "k", Species: "Kiwi"}, // no calltype
}

df := &utils.DataFile{
Meta: &utils.DataMeta{},
Segments: []*utils.Segment{
{
StartTime: 10.0,
EndTime: 20.0,
Labels: []*utils.Label{
{Species: "Kiwi", Certainty: 100, Filter: "test-filter", CallType: "Male"},
},
},
},
}

state := NewClassifyState(ClassifyConfig{
Filter: "test-filter",
Reviewer: "David",
Bindings: bindings,
Certainty: -1,
}, []*utils.DataFile{df})

// Apply "k" = Kiwi (should remove Male calltype)
result := &BindingResult{Species: "Kiwi"}
state.ApplyBinding(result)

if df.Segments[0].Labels[0].CallType != "" {
t.Errorf("expected CallType='', got %s (should be removed)", df.Segments[0].Labels[0].CallType)
}
}

func TestConfirmLabelDontKnow(t *testing.T) {
df := &utils.DataFile{
Meta: &utils.DataMeta{},
Segments: []*utils.Segment{
{
StartTime: 10.0,
EndTime: 20.0,
Labels: []*utils.Label{
{Species: "Don't Know", Certainty: 0, Filter: "test-filter"},
},
},
},
}

state := NewClassifyState(ClassifyConfig{
Filter: "test-filter",
Reviewer: "David",
Certainty: -1,
}, []*utils.DataFile{df})

// ConfirmLabel on Don't Know should be a no-op
if state.ConfirmLabel() {
t.Error("ConfirmLabel() should return false for Don't Know (certainty=0)")
}

label := df.Segments[0].Labels[0]
if label.Species != "Don't Know" {
t.Errorf("Species should remain Don't Know, got %s", label.Species)
}
if label.Certainty != 0 {
t.Errorf("Certainty should remain 0, got %d", label.Certainty)
}
if state.Dirty {
t.Error("State should not be dirty after confirming Don't Know")
}
}
file addition: calls_classify_load_test.go (----------)

[0.67281]

package calls

import (
"os"
"path/filepath"
"testing"
)

// writeDataFileContent creates a .data file in dir with the given raw content.
func writeDataFileContent(t *testing.T, dir, name, content string) {
t.Helper()
if err := os.WriteFile(filepath.Join(dir, name), []byte(content), 0644); err != nil {
t.Fatal(err)
}
}

// mustLoadDataFiles is a test helper that calls LoadDataFiles and fatals on error.
func mustLoadDataFiles(t *testing.T, config ClassifyConfig) *ClassifyState {
t.Helper()
state, err := LoadDataFiles(config)
if err != nil {
t.Fatal(err)
}
return state
}

// assertFileSegCounts checks file count and total segment count match expected values.
func assertFileSegCounts(t *testing.T, state *ClassifyState, wantFiles, wantSegs int, label string) {
t.Helper()
if len(state.DataFiles) != wantFiles {
t.Errorf("%s: expected %d files, got %d", label, wantFiles, len(state.DataFiles))
}
if state.TotalSegments() != wantSegs {
t.Errorf("%s: expected %d segments total, got %d", label, wantSegs, state.TotalSegments())
}
}

const (
kiwiSeg = `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]]]`
tomtitSeg = `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Tomtit", "certainty": 90}]]]`
)

func TestLoadDataFilesFiltersFilesWithNoMatchingSegments(t *testing.T) {
tempDir := t.TempDir()

writeDataFileContent(t, tempDir, "file1.data", kiwiSeg)
writeDataFileContent(t, tempDir, "file2.data", tomtitSeg)
writeDataFileContent(t, tempDir, "file3.data", kiwiSeg)

t.Run("no_filter", func(t *testing.T) {
state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Certainty: -1})
assertFileSegCounts(t, state, 3, 3, "No filter")
})

t.Run("species_kiwi", func(t *testing.T) {
state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})
assertFileSegCounts(t, state, 2, 2, "Species=Kiwi")
})

t.Run("species_tomtit", func(t *testing.T) {
state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Tomtit", Certainty: -1})
assertFileSegCounts(t, state, 1, 1, "Species=Tomtit")
})

t.Run("species_nonexistent", func(t *testing.T) {
state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "NonExistent", Certainty: -1})
assertFileSegCounts(t, state, 0, 0, "Species=NonExistent")
})
}

func TestLoadDataFilesWithMixedSegments(t *testing.T) {
tempDir := t.TempDir()

file := `[
{"Operator": "test"},
[0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]],
[10, 20, 100, 1000, [{"species": "Tomtit", "certainty": 80}]],
[20, 30, 100, 1000, [{"species": "Kiwi", "certainty": 95}]]
]`
writeDataFileContent(t, tempDir, "mixed.data", file)

state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})

if len(state.DataFiles) != 1 {
t.Errorf("Expected 1 file, got %d", len(state.DataFiles))
}
if state.TotalSegments() != 2 {
t.Errorf("Species=Kiwi: expected 2 segments, got %d", state.TotalSegments())
}

// The DataFile should still have all 3 segments internally
// but cached filtered segments should return only the Kiwi ones
if len(state.DataFiles[0].Segments) != 3 {
t.Errorf("DataFile should have 3 segments internally, got %d", len(state.DataFiles[0].Segments))
}

// TotalSegments uses cached filtered segments
if state.TotalSegments() != 2 {
t.Errorf("TotalSegments should return 2 Kiwi segments, got %d", state.TotalSegments())
}
}

// Test that the original DataFile segments are not modified (immutable filtering)
func TestFilteringDoesNotModifyOriginalSegments(t *testing.T) {
tempDir := t.TempDir()

file := `[
{"Operator": "test"},
[0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]],
[10, 20, 100, 1000, [{"species": "Tomtit", "certainty": 80}]]
]`
writeDataFileContent(t, tempDir, "test.data", file)

state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})

// Original segments should be untouched
originalSegments := state.DataFiles[0].Segments
if len(originalSegments) != 2 {
t.Errorf("Original should have 2 segments, got %d", len(originalSegments))
}

// Verify all original segments are preserved
species := []string{}
for _, seg := range originalSegments {
if len(seg.Labels) > 0 {
species = append(species, seg.Labels[0].Species)
}
}
if len(species) != 2 || species[0] != "Kiwi" || species[1] != "Tomtit" {
t.Errorf("Original segments should have both species, got %v", species)
}
}

func TestLoadDataFilesCertaintyPruning(t *testing.T) {
tempDir := t.TempDir()

writeDataFileContent(t, tempDir, "file1.data", `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 70}]]]`)
writeDataFileContent(t, tempDir, "file2.data", `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 100}]]]`)

state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Certainty: 100})

assertFileSegCounts(t, state, 1, 1, "Certainty=100")

// CurrentSegment should work (not nil) because file1 was pruned
seg := state.CurrentSegment()
if seg == nil {
t.Error("CurrentSegment should not be nil after pruning")
}
}
file addition: calls_classify_filter_test.go (----------)

[0.67281]

package calls

import (
"math/rand"
"testing"

"skraak/utils"
)

func TestTotalSegmentsRespectsFilters(t *testing.T) {
// Create test data files with different species and filters
df1 := &utils.DataFile{
FilePath: "/test/file1.data",
Segments: []*utils.Segment{
{
StartTime: 0,
EndTime: 10,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0"},
},
},
{
StartTime: 10,
EndTime: 20,
Labels: []*utils.Label{
{Species: "Tomtit", Filter: "model-1.0"},
},
},
},
}

df2 := &utils.DataFile{
FilePath: "/test/file2.data",
Segments: []*utils.Segment{
{
StartTime: 0,
EndTime: 10,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0"},
},
},
},
}

// Test 1: No filters - should count all segments (3)
state1 := NewClassifyState(ClassifyConfig{Certainty: -1}, []*utils.DataFile{df1, df2})
if got := state1.TotalSegments(); got != 3 {
t.Errorf("No filters: expected 3 segments, got %d", got)
}

// Test 2: Filter by species "Kiwi" - should count only Kiwi segments (2)
state2 := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df1, df2})
if got := state2.TotalSegments(); got != 2 {
t.Errorf("Species=Kiwi: expected 2 segments, got %d", got)
}

// Test 3: Filter by species "Tomtit" - should count only Tomtit segments (1)
state3 := NewClassifyState(ClassifyConfig{Species: "Tomtit", Certainty: -1}, []*utils.DataFile{df1, df2})
if got := state3.TotalSegments(); got != 1 {
t.Errorf("Species=Tomtit: expected 1 segment, got %d", got)
}

// Test 4: Filter by filter name "model-1.0" - should count all segments (3)
state4 := NewClassifyState(ClassifyConfig{Filter: "model-1.0", Certainty: -1}, []*utils.DataFile{df1, df2})
if got := state4.TotalSegments(); got != 3 {
t.Errorf("Filter=model-1.0: expected 3 segments, got %d", got)
}

// Test 5: Filter by non-existent species - should count 0
state5 := NewClassifyState(ClassifyConfig{Species: "NonExistent", Certainty: -1}, []*utils.DataFile{df1, df2})
if got := state5.TotalSegments(); got != 0 {
t.Errorf("Species=NonExistent: expected 0 segments, got %d", got)
}

// Test 6: Combined filter + species
df3 := &utils.DataFile{
FilePath: "/test/file3.data",
Segments: []*utils.Segment{
{
StartTime: 0,
EndTime: 10,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0", CallType: "Duet"},
},
},
{
StartTime: 10,
EndTime: 20,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-2.0", CallType: "Male"},
},
},
},
}
state6 := NewClassifyState(ClassifyConfig{Filter: "model-1.0", Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df3})
if got := state6.TotalSegments(); got != 1 {
t.Errorf("Filter=model-1.0 + Species=Kiwi: expected 1 segment, got %d", got)
}
}

func TestCurrentSegmentNumberWithFilters(t *testing.T) {
// Create test data files
df1 := &utils.DataFile{
FilePath: "/test/file1.data",
Segments: []*utils.Segment{
{
StartTime: 0,
EndTime: 10,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0"},
},
},
{
StartTime: 10,
EndTime: 20,
Labels: []*utils.Label{
{Species: "Tomtit", Filter: "model-1.0"},
},
},
},
}

df2 := &utils.DataFile{
FilePath: "/test/file2.data",
Segments: []*utils.Segment{
{
StartTime: 0,
EndTime: 10,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0"},
},
},
},
}

// Test: Filter by species "Kiwi", at file 2, segment 0
// Should report current segment as 2 (first Kiwi in df1 + first Kiwi in df2)
state := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df1, df2})
state.FileIdx = 1 // at df2
state.SegmentIdx = 0

if got := state.CurrentSegmentNumber(); got != 2 {
t.Errorf("Species=Kiwi, at file 2, seg 0: expected current segment 2, got %d", got)
}
}

func TestCertaintyFiltering(t *testing.T) {
// Create test data files with different certainty levels
df := &utils.DataFile{
FilePath: "/test/file1.data",
Segments: []*utils.Segment{
{
StartTime: 0,
EndTime: 10,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0", Certainty: 70},
},
},
{
StartTime: 10,
EndTime: 20,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0", Certainty: 100},
},
},
{
StartTime: 20,
EndTime: 30,
Labels: []*utils.Label{
{Species: "Tomtit", Filter: "model-1.0", Certainty: 70},
},
},
},
}

// Test 1: Filter by certainty 70 - should get 2 segments
state1 := NewClassifyState(ClassifyConfig{Certainty: 70}, []*utils.DataFile{df})
if got := state1.TotalSegments(); got != 2 {
t.Errorf("Certainty=70: expected 2 segments, got %d", got)
}

// Test 2: Filter by certainty 100 - should get 1 segment
state2 := NewClassifyState(ClassifyConfig{Certainty: 100}, []*utils.DataFile{df})
if got := state2.TotalSegments(); got != 1 {
t.Errorf("Certainty=100: expected 1 segment, got %d", got)
}

// Test 3: Filter by certainty 0 - should get 0 segments
state3 := NewClassifyState(ClassifyConfig{Certainty: 0}, []*utils.DataFile{df})
if got := state3.TotalSegments(); got != 0 {
t.Errorf("Certainty=0: expected 0 segments, got %d", got)
}

// Test 4: Combined species + certainty
state4 := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: 70}, []*utils.DataFile{df})
if got := state4.TotalSegments(); got != 1 {
t.Errorf("Species=Kiwi + Certainty=70: expected 1 segment, got %d", got)
}
}

func TestSampling(t *testing.T) {
makeSegs := func(n int) []*utils.Segment {
s := make([]*utils.Segment, n)
for i := range s {
s[i] = &utils.Segment{StartTime: float64(i), EndTime: float64(i + 1)}
}
return s
}

df1 := &utils.DataFile{FilePath: "/test/f1.data", Segments: makeSegs(6)}
df2 := &utils.DataFile{FilePath: "/test/f2.data", Segments: makeSegs(4)}
kept := []*utils.DataFile{df1, df2}
cached := [][]*utils.Segment{df1.Segments, df2.Segments}

countTotal := func(c [][]*utils.Segment) int {
n := 0
for _, s := range c {
n += len(s)
}
return n
}

// 50% of 10 → 5
k, c := applySampling(kept, cached, 50, rand.New(rand.NewSource(42)))
if got := countTotal(c); got != 5 {
t.Errorf("sample 50%%: expected 5, got %d", got)
}
// Files must be in original chronological order
for i := 1; i < len(k); i++ {
if k[i].FilePath < k[i-1].FilePath {
t.Errorf("sample 50%%: files out of order at index %d", i)
}
}

// 10% of 10 → 1
_, c2 := applySampling(kept, cached, 10, rand.New(rand.NewSource(42)))
if got := countTotal(c2); got != 1 {
t.Errorf("sample 10%%: expected 1, got %d", got)
}

// 1% of 10 → clamp to 1
_, c3 := applySampling(kept, cached, 1, rand.New(rand.NewSource(42)))
if got := countTotal(c3); got != 1 {
t.Errorf("sample 1%%: expected 1 (clamped), got %d", got)
}

// 99% of 10 → 9
_, c4 := applySampling(kept, cached, 99, rand.New(rand.NewSource(42)))
if got := countTotal(c4); got != 9 {
t.Errorf("sample 99%%: expected 9, got %d", got)
}
}

func TestCertaintyPruning(t *testing.T) {
// Simulate the bug: first file has no matching certainty segments
df1 := &utils.DataFile{
FilePath: "/test/file1.data",
Segments: []*utils.Segment{
{
StartTime: 0,
EndTime: 10,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0", Certainty: 70},
},
},
},
}

df2 := &utils.DataFile{
FilePath: "/test/file2.data",
Segments: []*utils.Segment{
{
StartTime: 0,
EndTime: 10,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0", Certainty: 100},
},
},
},
}

// Without pruning (old bug): file1 is first, has no certainty=100 segments
// CurrentSegment() would return nil even though TotalSegments() > 0
state := NewClassifyState(ClassifyConfig{Certainty: 100}, []*utils.DataFile{df1, df2})

// TotalSegments should be 1 (only file2 has certainty 100)
if got := state.TotalSegments(); got != 1 {
t.Errorf("Certainty=100: expected 1 segment, got %d", got)
}

// CurrentSegment should work if files are properly pruned
// Note: this test assumes LoadDataFiles does the pruning
// Here we test the state after manual construction
}

func TestCallTypeNoneFiltering(t *testing.T) {
// Create test data: Kiwi with calltype, Kiwi without, Tomtit without
df := &utils.DataFile{
FilePath: "/test/file1.data",
Segments: []*utils.Segment{
{
StartTime: 0,
EndTime: 10,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0", CallType: "Male"},
},
},
{
StartTime: 10,
EndTime: 20,
Labels: []*utils.Label{
{Species: "Kiwi", Filter: "model-1.0"}, // no calltype
},
},
{
StartTime: 20,
EndTime: 30,
Labels: []*utils.Label{
{Species: "Tomtit", Filter: "model-1.0"}, // no calltype, wrong species
},
},
},
}

// Test 1: --species Kiwi+_ should match only Kiwi with no calltype (1 segment)
state1 := NewClassifyState(ClassifyConfig{Species: "Kiwi", CallType: utils.CallTypeNone, Certainty: -1}, []*utils.DataFile{df})
if got := state1.TotalSegments(); got != 1 {
t.Errorf("Species=Kiwi+_: expected 1 segment, got %d", got)
}

// Test 2: --species Kiwi should still match all Kiwi (2 segments)
state2 := NewClassifyState(ClassifyConfig{Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df})
if got := state2.TotalSegments(); got != 2 {
t.Errorf("Species=Kiwi: expected 2 segments, got %d", got)
}

// Test 3: --species Kiwi+Male should still work as before (1 segment)
state3 := NewClassifyState(ClassifyConfig{Species: "Kiwi", CallType: "Male", Certainty: -1}, []*utils.DataFile{df})
if got := state3.TotalSegments(); got != 1 {
t.Errorf("Species=Kiwi+Male: expected 1 segment, got %d", got)
}
}
file addition: calls_classify.go (----------)

[0.67281]

package calls

import (
"fmt"
"math/rand"
"os"
"path/filepath"
"slices"
"sort"
"strings"
"time"

"skraak/utils"
)

// KeyBinding maps a key to a species/calltype
type KeyBinding struct {
Key string // single char: "k", "n", "p"
Species string // "Kiwi", "Don't Know", "Morepork"
CallType string // "Duet", "Female", "Male" (optional)
}

// ClassifyConfig holds the configuration for classification
type ClassifyConfig struct {
Folder string
File string
Filter string
Species string // scope to this species (optional)
CallType string // scope to this calltype within species (optional)
Certainty int // scope to this certainty value, -1 = no filter (optional)
Sample int // random sample percentage 1-99, -1 = no sampling, 100 = no-op
Goto string // goto this file on startup (optional, basename match)
Reviewer string
Color bool
ImageSize int // spectrogram display size in pixels (0 = default)
Sixel bool
ITerm bool
Bindings []KeyBinding
// SecondaryBindings maps a primary binding key to per-species calltype
// keys. Invoked via Shift+primary-key: the species is labeled without
// advancing, and the next key is interpreted as a calltype.
SecondaryBindings map[string]map[string]string
Night bool
Day bool
Lat float64
Lng float64
Timezone string
}

// ClassifyState holds the current state for TUI
type ClassifyState struct {
Config ClassifyConfig
DataFiles []*utils.DataFile
filteredSegs [][]*utils.Segment // cached at load time, parallel to DataFiles
totalSegs int // pre-computed total segment count
FileIdx int
SegmentIdx int
Dirty bool
Player *utils.AudioPlayer
PlaybackSpeed float64 // Current playback speed (1.0 = normal, 0.5 = half speed)
TimeFilteredCount int // files skipped by --night or --day filter
}

// BindingResult represents parsed key result
type BindingResult struct {
Species string
CallType string // empty string = remove calltype
}

// LoadDataFiles loads all .data files for classification
// findDataFilePaths resolves the list of .data file paths from config.
func findDataFilePaths(config ClassifyConfig) ([]string, error) {
if config.File != "" {
return []string{config.File}, nil
}
paths, err := utils.FindDataFiles(config.Folder)
if err != nil {
return nil, fmt.Errorf("find data files: %w", err)
}
return paths, nil
}

// filterDataFileSegments applies segment and day/night filters to a single data file.
// Returns the filtered segments and whether the file should be kept.
// If the file is filtered out (no matching segments, or time-of-day), returns nil, false.
func filterDataFileSegments(df *utils.DataFile, config ClassifyConfig) ([]*utils.Segment, bool, int) {
segs := filterSegmentsByLabel(df.Segments, config)
if segs == nil {
return nil, false, 0
}

timeFiltered := 0
if config.Night || config.Day {
keep, tf := filterByTimeOfDay(df.FilePath, config)
if !keep {
return nil, false, tf
}
}
return segs, true, timeFiltered
}

// filterSegmentsByLabel applies label/species/certainty filters, returning matching segments.
// Returns nil if no segments match (caller should skip the file).
func filterSegmentsByLabel(segments []*utils.Segment, config ClassifyConfig) []*utils.Segment {
hasFilter := config.Filter != "" || config.Species != "" || config.Certainty >= 0
if !hasFilter {
return segments
}
var segs []*utils.Segment
for _, seg := range segments {
if seg.SegmentMatchesFilters(config.Filter, config.Species, config.CallType, config.Certainty) {
segs = append(segs, seg)
}
}
return segs // nil if empty, caller treats as "skip"
}

// filterByTimeOfDay checks --night/--day time-of-day filter for a .data file.
// Returns (keep, timeFilteredCount).
func filterByTimeOfDay(dataFilePath string, config ClassifyConfig) (bool, int) {
wavPath := filepath.Clean(strings.TrimSuffix(dataFilePath, ".data"))
result, err := IsNight(IsNightInput{
FilePath: wavPath,
Lat: config.Lat,
Lng: config.Lng,
Timezone: config.Timezone,
})
if err != nil {
fmt.Fprintf(os.Stderr, "warning: skipping %s (isnight error: %v)\n", wavPath, err)
return false, 1
}
if config.Night && !result.SolarNight {
return false, 1
}
if config.Day && !result.DiurnalActive {
return false, 1
}
return true, 0
}

func LoadDataFiles(config ClassifyConfig) (*ClassifyState, error) {
dataFiles, err := parseAndSortDataFiles(config)
if err != nil {
return nil, err
}

kept, cachedSegs, timeFiltered := filterDataFiles(dataFiles, config)

if config.Sample > 0 && config.Sample < 100 {
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
kept, cachedSegs = applySampling(kept, cachedSegs, config.Sample, rng)
}

return buildClassifyState(config, kept, cachedSegs, timeFiltered)
}

// parseAndSortDataFiles finds, parses, and sorts .data files from the config.
func parseAndSortDataFiles(config ClassifyConfig) ([]*utils.DataFile, error) {
filePaths, err := findDataFilePaths(config)
if err != nil {
return nil, err
}
if len(filePaths) == 0 {
return nil, fmt.Errorf("no .data files found")
}

var dataFiles []*utils.DataFile
for _, path := range filePaths {
df, err := utils.ParseDataFile(path)
if err != nil {
continue
}
dataFiles = append(dataFiles, df)
}
if len(dataFiles) == 0 {
return nil, fmt.Errorf("no valid .data files")
}

sort.Slice(dataFiles, func(i, j int) bool {
return dataFiles[i].FilePath < dataFiles[j].FilePath
})

return dataFiles, nil
}

// filterDataFiles applies segment filters to each data file, returning kept files and their segments.
func filterDataFiles(dataFiles []*utils.DataFile, config ClassifyConfig) ([]*utils.DataFile, [][]*utils.Segment, int) {
var kept []*utils.DataFile
var cachedSegs [][]*utils.Segment
var timeFiltered int

for _, df := range dataFiles {
segs, keep, tf := filterDataFileSegments(df, config)
timeFiltered += tf
if !keep {
continue
}
kept = append(kept, df)
cachedSegs = append(cachedSegs, segs)
}
return kept, cachedSegs, timeFiltered
}

// buildClassifyState constructs the ClassifyState, handling --goto file positioning.
func buildClassifyState(config ClassifyConfig, dataFiles []*utils.DataFile, filteredSegs [][]*utils.Segment, timeFiltered int) (*ClassifyState, error) {
total := 0
for _, segs := range filteredSegs {
total += len(segs)
}

state := &ClassifyState{
Config: config,
DataFiles: dataFiles,
filteredSegs: filteredSegs,
totalSegs: total,
TimeFilteredCount: timeFiltered,
}

if config.Goto == "" {
return state, nil
}

for i, df := range state.DataFiles {
base := df.FilePath[strings.LastIndex(df.FilePath, "/")+1:]
if base == config.Goto {
state.FileIdx = i
return state, nil
}
}
return nil, fmt.Errorf("goto file not found (or has no matching segments): %s", config.Goto)
}

// applySampling randomly selects sample% of segments from the filtered set.
// The returned files and segments preserve the original chronological order.
func applySampling(kept []*utils.DataFile, cachedSegs [][]*utils.Segment, sample int, rng *rand.Rand) ([]*utils.DataFile, [][]*utils.Segment) {
flat := make([]struct{ fileIdx, segIdx int }, 0)
for fi, segs := range cachedSegs {
for si := range segs {
flat = append(flat, struct{ fileIdx, segIdx int }{fi, si})
}
}

targetCount := max(len(flat)*sample/100, 1)

rng.Shuffle(len(flat), func(i, j int) { flat[i], flat[j] = flat[j], flat[i] })
selected := flat[:targetCount]

// Restore chronological order before rebuilding
sort.Slice(selected, func(i, j int) bool {
if selected[i].fileIdx != selected[j].fileIdx {
return selected[i].fileIdx < selected[j].fileIdx
}
return selected[i].segIdx < selected[j].segIdx
})

newCached := make([][]*utils.Segment, len(cachedSegs))
for _, ref := range selected {
newCached[ref.fileIdx] = append(newCached[ref.fileIdx], cachedSegs[ref.fileIdx][ref.segIdx])
}

var newKept []*utils.DataFile
var finalCached [][]*utils.Segment
for i, segs := range newCached {
if len(segs) > 0 {
newKept = append(newKept, kept[i])
finalCached = append(finalCached, segs)
}
}
return newKept, finalCached
}

// FilteredSegs returns the cached filtered segments parallel to DataFiles.
func (s *ClassifyState) FilteredSegs() [][]*utils.Segment {
return s.filteredSegs
}

// CurrentFile returns the current data file
func (s *ClassifyState) CurrentFile() *utils.DataFile {
if s.FileIdx >= len(s.DataFiles) {
return nil
}
return s.DataFiles[s.FileIdx]
}

// CurrentSegment returns the current segment
func (s *ClassifyState) CurrentSegment() *utils.Segment {
if s.FileIdx >= len(s.filteredSegs) {
return nil
}
segs := s.filteredSegs[s.FileIdx]
if s.SegmentIdx >= len(segs) {
return nil
}
return segs[s.SegmentIdx]
}

// TotalSegments returns total segments to review
func (s *ClassifyState) TotalSegments() int {
return s.totalSegs
}

// CurrentSegmentNumber returns 1-based segment number
func (s *ClassifyState) CurrentSegmentNumber() int {
count := 0
for i := 0; i < s.FileIdx; i++ {
count += len(s.filteredSegs[i])
}
return count + s.SegmentIdx + 1
}

// NextSegment moves to the next segment, returns false if at end
func (s *ClassifyState) NextSegment() bool {
if s.FileIdx >= len(s.filteredSegs) {
return false
}

segs := s.filteredSegs[s.FileIdx]
if s.SegmentIdx+1 < len(segs) {
s.SegmentIdx++
return true
}

// Move to next file
if s.FileIdx+1 < len(s.DataFiles) {
s.FileIdx++
s.SegmentIdx = 0
return true
}

return false
}

// PrevSegment moves to the previous segment, returns false if at start
func (s *ClassifyState) PrevSegment() bool {
if s.SegmentIdx > 0 {
s.SegmentIdx--
return true
}

// Move to previous file
if s.FileIdx > 0 {
s.FileIdx--
segs := s.filteredSegs[s.FileIdx]
s.SegmentIdx = max(len(segs)-1, 0)
return true
}

return false
}

// ParseKeyBuffer parses a single key into binding result
func (s *ClassifyState) ParseKeyBuffer(key string) *BindingResult {
for _, b := range s.Config.Bindings {
if b.Key == key {
return &BindingResult{
Species: b.Species,
CallType: b.CallType,
}
}
}
return nil
}

// SetComment sets the comment on the current segment's filter label.
// Returns the previous comment (for undo) or empty string if none.
func (s *ClassifyState) SetComment(comment string) string {
seg := s.CurrentSegment()
if seg == nil {
return ""
}

df := s.CurrentFile()
if df == nil {
return ""
}

// Set reviewer
df.Meta.Reviewer = s.Config.Reviewer

// Get labels matching filter
filterLabels := seg.GetFilterLabels(s.Config.Filter)

var oldComment string
if len(filterLabels) == 0 {
// No matching labels, add new one with comment
label := &utils.Label{
Species: "Don't Know",
Certainty: 0,
Filter: s.Config.Filter,
Comment: comment,
}
seg.Labels = append(seg.Labels, label)
} else {
// Set comment on first matching label
oldComment = filterLabels[0].Comment
filterLabels[0].Comment = comment
}

s.Dirty = true
return oldComment
}

// GetCurrentComment returns the comment on the current segment's filter label.
func (s *ClassifyState) GetCurrentComment() string {
seg := s.CurrentSegment()
if seg == nil {
return ""
}

filterLabels := seg.GetFilterLabels(s.Config.Filter)
if len(filterLabels) == 0 {
return ""
}
return filterLabels[0].Comment
}

// ApplyBinding applies a binding result to the current segment
func (s *ClassifyState) ApplyBinding(result *BindingResult) {
seg := s.CurrentSegment()
if seg == nil {
return
}

df := s.CurrentFile()
if df == nil {
return
}

// Set reviewer
df.Meta.Reviewer = s.Config.Reviewer

// Get labels matching filter
filterLabels := seg.GetFilterLabels(s.Config.Filter)

// Determine certainty: 0 for Don't Know, 100 for others
certainty := 100
if result.Species == "Don't Know" {
certainty = 0
}

if len(filterLabels) == 0 {
// No matching labels, add new one
seg.Labels = append(seg.Labels, &utils.Label{
Species: result.Species,
Certainty: certainty,
Filter: s.Config.Filter,
CallType: result.CallType,
})
} else {
// Edit first matching label, remove rest
filterLabels[0].Species = result.Species
filterLabels[0].Certainty = certainty
filterLabels[0].CallType = result.CallType // always set (empty = remove)

// Remove extra matching labels
if len(filterLabels) > 1 {
var newLabels []*utils.Label
for _, l := range seg.Labels {
keep := !slices.Contains(filterLabels[1:], l)
if keep {
newLabels = append(newLabels, l)
}
}
seg.Labels = newLabels
}
}

// Re-sort labels
sort.Slice(seg.Labels, func(i, j int) bool {
return seg.Labels[i].Species < seg.Labels[j].Species
})

s.Dirty = true
}

// ApplyCallTypeOnly sets the CallType on the current segment's first
// filter-matching label. Used after a Shift+primary keypress labeled the
// species and we now receive the secondary key for the calltype.
// No-op if there is no matching label to update.
func (s *ClassifyState) ApplyCallTypeOnly(callType string) {
seg := s.CurrentSegment()
if seg == nil {
return
}
df := s.CurrentFile()
if df == nil {
return
}
filterLabels := seg.GetFilterLabels(s.Config.Filter)
if len(filterLabels) == 0 {
return
}
df.Meta.Reviewer = s.Config.Reviewer
filterLabels[0].CallType = callType
s.Dirty = true
}

// HasSecondary reports whether the given primary key has any secondary
// (calltype) bindings configured.
func (s *ClassifyState) HasSecondary(primaryKey string) bool {
return len(s.Config.SecondaryBindings[primaryKey]) > 0
}

// ConfirmLabel upgrades the current segment's existing filter label certainty
// to 100. Returns true if a write is needed (label existed and was below 100).
// Returns false for Don't Know (certainty=0) — confirming a Don't Know is a no-op;
// the caller should just advance to the next segment.
func (s *ClassifyState) ConfirmLabel() bool {
seg := s.CurrentSegment()
if seg == nil {
return false
}
filterLabels := seg.GetFilterLabels(s.Config.Filter)
if len(filterLabels) == 0 {
return false
}
if filterLabels[0].Certainty == 0 {
return false
}
if filterLabels[0].Certainty == 100 {
return false
}
df := s.CurrentFile()
if df == nil {
return false
}
df.Meta.Reviewer = s.Config.Reviewer
filterLabels[0].Certainty = 100
s.Dirty = true
return true
}

// Save saves the current file
func (s *ClassifyState) Save() error {
df := s.CurrentFile()
if df == nil {
return nil
}

if !s.Dirty {
return nil
}

err := df.Write(df.FilePath)
if err != nil {
return err
}

s.Dirty = false
return nil
}

// getFilterLabel returns the label matching the current filter, or first label if no filter.
func (s *ClassifyState) getFilterLabel(seg *utils.Segment) *utils.Label {
if s.Config.Filter == "" {
if len(seg.Labels) > 0 {
return seg.Labels[0]
}
return nil
}
for _, label := range seg.Labels {
if label.Filter == s.Config.Filter {
return label
}
}
return nil
}

// getOrCreateFilterLabel gets existing label or creates new one for the current filter.
func (s *ClassifyState) getOrCreateFilterLabel(seg *utils.Segment) *utils.Label {
label := s.getFilterLabel(seg)
if label != nil {
return label
}
// Create new label
label = &utils.Label{
Species: "Don't Know",
Certainty: 0,
Filter: s.Config.Filter,
}
seg.Labels = append(seg.Labels, label)
s.Dirty = true
return label
}

// HasBookmark returns true if current segment has a bookmark on the filter label.
func (s *ClassifyState) HasBookmark() bool {
seg := s.CurrentSegment()
if seg == nil {
return false
}
label := s.getFilterLabel(seg)
return label != nil && label.Bookmark
}

// ToggleBookmark toggles the bookmark on the current segment's filter label.
func (s *ClassifyState) ToggleBookmark() {
seg := s.CurrentSegment()
if seg == nil {
return
}

df := s.CurrentFile()
if df == nil {
return
}

// Set reviewer
df.Meta.Reviewer = s.Config.Reviewer

label := s.getOrCreateFilterLabel(seg)
label.Bookmark = !label.Bookmark
s.Dirty = true
}

// NextBookmark navigates to the next bookmark, wrapping around if needed.
// Returns false if no bookmarks found (back at start position).
func (s *ClassifyState) NextBookmark() bool {
startFile := s.FileIdx
startSeg := s.SegmentIdx
first := true

for {
// Advance to next segment
if !s.NextSegment() {
// Wrap to start of folder
s.FileIdx = 0
s.SegmentIdx = 0
}

// Check if we've looped back to start
if !first && s.FileIdx == startFile && s.SegmentIdx == startSeg {
return false // full circle, no bookmark found
}
first = false

// Check if current segment has bookmark
if s.hasFilterBookmark() {
return true
}
}
}

// PrevBookmark navigates to the previous bookmark, wrapping around if needed.
// Returns false if no bookmarks found (back at start position).
func (s *ClassifyState) PrevBookmark() bool {
startFile := s.FileIdx
startSeg := s.SegmentIdx
first := true

for {
// Move to previous segment
if !s.PrevSegment() {
// Wrap to end of folder
s.FileIdx = len(s.DataFiles) - 1
segs := s.filteredSegs[s.FileIdx]
s.SegmentIdx = max(len(segs)-1, 0)
}

// Check if we've looped back to start
if !first && s.FileIdx == startFile && s.SegmentIdx == startSeg {
return false // full circle, no bookmark found
}
first = false

// Check if current segment has bookmark
if s.hasFilterBookmark() {
return true
}
}
}

// hasFilterBookmark checks if current segment has bookmark on filter-matching label.
func (s *ClassifyState) hasFilterBookmark() bool {
seg := s.CurrentSegment()
if seg == nil {
return false
}
label := s.getFilterLabel(seg)
return label != nil && label.Bookmark
}

// FormatLabels formats labels for display
func FormatLabels(labels []*utils.Label, filter string) string {
var parts []string
for _, l := range labels {
if filter != "" && l.Filter != filter {
continue
}
part := l.Species
if l.CallType != "" {
part += "/" + l.CallType
}
part += fmt.Sprintf(" (%d%%)", l.Certainty)
if l.Filter != "" {
part += " [" + l.Filter + "]"
}
if l.Comment != "" {
part += fmt.Sprintf(" \"%s\"", l.Comment)
}
parts = append(parts, part)
}
return strings.Join(parts, ", ")
}
file addition: avianz_types.go (----------)

[0.67281]

package calls

// AviaNZMeta is the metadata element in a .data file
type AviaNZMeta struct {
Operator string `json:"Operator"`
Reviewer *string `json:"Reviewer,omitempty"`
Duration float64 `json:"Duration"`
}

// AviaNZLabel represents a species label in a segment
type AviaNZLabel struct {
Species string `json:"species"`
Certainty int `json:"certainty"`
Filter string `json:"filter"`
}

// AviaNZSegment represents a detection segment [start, end, freq_low, freq_high, labels]
type AviaNZSegment [5]any
file addition: resolve.go (----------)

[6.790921]

package db

// ResolveDBPath returns the inputPath if non-empty, otherwise returns the
// fallback path. This is used by tools that accept an explicit DBPath in
// their Input struct but need a default when not provided.
func ResolveDBPath(inputPath, fallback string) string {
if inputPath != "" {
return inputPath
}
return fallback
}
edit in cmd/sql.go at line 57

[6.1043575]→[6.1043575:1043602](∅→∅)

tools.SetDBPath(*dbPath)
edit in cmd/pattern.go at line 65

[6.1055793]→[6.1055793:1055819](∅→∅)

tools.SetDBPath(*dbPath)
edit in cmd/pattern.go at line 124

[6.1057759]→[6.1057759:1057785](∅→∅)

tools.SetDBPath(*dbPath)
edit in cmd/location.go at line 68

[6.1063427]→[6.1063427:1063453](∅→∅)

tools.SetDBPath(*dbPath)
edit in cmd/location.go at line 118

[6.1065252]→[6.19731:19757](∅→∅)

tools.SetDBPath(*dbPath)
replacement in cmd/isnight.go at line 9

[6.1066387]→[6.1066387:1066403](∅→∅)

"skraak/tools"

[6.1066387]

[6.1066403]

"skraak/tools/calls"
replacement in cmd/isnight.go at line 72

[6.1069377]→[6.1069377:1069427](∅→∅)

output, err := tools.IsNight(tools.IsNightInput{

[6.1069377]

[6.1069427]

output, err := calls.IsNight(calls.IsNightInput{
replacement in cmd/import.go at line 10

[6.1070091]→[6.1070091:1070107](∅→∅)

"skraak/tools"

[6.1070091]

[6.1070107]

imp "skraak/tools/import"
edit in cmd/import.go at line 94

[6.1074064]→[6.1074064:1074090](∅→∅)

tools.SetDBPath(*dbPath)
replacement in cmd/import.go at line 97

[6.1074123]→[6.1074123:1074160](∅→∅)

input := tools.BulkFileImportInput{

[6.1074123]

[4.7134]

input := imp.BulkFileImportInput{
replacement in cmd/import.go at line 111

[6.1074584]→[6.1074584:1074650](∅→∅)

output, err := tools.BulkFileImport(context.Background(), input)

[6.1074584]

[6.1074650]

output, err := imp.BulkFileImport(context.Background(), input)
edit in cmd/import.go at line 164

[6.1115]→[6.1077012:1077039](∅→∅),[6.3606]→[6.1077012:1077039](∅→∅),[6.1077012]→[6.1077012:1077039](∅→∅)

tools.SetDBPath(*dbPath)
replacement in cmd/import.go at line 167

[6.1077072]→[6.1077072:1077105](∅→∅)

input := tools.ImportFileInput{

[6.1077072]

[4.7159]

input := imp.ImportFileInput{
replacement in cmd/import.go at line 177

[6.1077273]→[6.1077273:1077335](∅→∅)

output, err := tools.ImportFile(context.Background(), input)

[6.1077273]

[6.1077335]

output, err := imp.ImportFile(context.Background(), input)
edit in cmd/import.go at line 230

[6.1459]→[6.1079771:1079798](∅→∅),[6.3744]→[6.1079771:1079798](∅→∅),[6.1079771]→[6.1079771:1079798](∅→∅)

tools.SetDBPath(*dbPath)
replacement in cmd/import.go at line 233

[6.1079831]→[6.1079831:1079870](∅→∅)

input := tools.ImportAudioFilesInput{

[6.1079831]

[4.7183]

input := imp.ImportAudioFilesInput{
replacement in cmd/import.go at line 247

[6.1080148]→[6.1080148:1080216](∅→∅)

output, err := tools.ImportAudioFiles(context.Background(), input)

[6.1080148]

[6.1080216]

output, err := imp.ImportAudioFiles(context.Background(), input)
edit in cmd/import.go at line 334

[6.1860]→[6.1084474:1084501](∅→∅),[6.3909]→[6.1084474:1084501](∅→∅),[6.1084474]→[6.1084474:1084501](∅→∅)

tools.SetDBPath(*dbPath)
replacement in cmd/import.go at line 337

[6.1084534]→[6.1084534:1084571](∅→∅)

input := tools.ImportSegmentsInput{

[6.1084534]

[4.7207]

input := imp.ImportSegmentsInput{
replacement in cmd/import.go at line 358

[6.1085170]→[6.1085170:1085236](∅→∅)

output, err := tools.ImportSegments(context.Background(), input)

[6.1085170]

[6.1085236]

output, err := imp.ImportSegments(context.Background(), input)
edit in cmd/import.go at line 419

[6.2220]→[6.1088110:1088137](∅→∅),[6.3995]→[6.1088110:1088137](∅→∅),[6.1088110]→[6.1088110:1088137](∅→∅)

tools.SetDBPath(*dbPath)
replacement in cmd/import.go at line 422

[6.1088170]→[6.1088170:1088211](∅→∅)

input := tools.ImportUnstructuredInput{

[6.1088170]

[4.7231]

input := imp.ImportUnstructuredInput{
replacement in cmd/import.go at line 435

[6.1088511]→[6.1088511:1088581](∅→∅)

output, err := tools.ImportUnstructured(context.Background(), input)

[6.1088511]

[6.1088581]

output, err := imp.ImportUnstructured(context.Background(), input)
edit in cmd/export.go at line 76

[6.4073]→[6.1091992:1092019](∅→∅),[6.6873]→[6.1091992:1092019](∅→∅),[6.1091992]→[6.1091992:1092019](∅→∅)

tools.SetDBPath(*dbPath)
edit in cmd/dataset.go at line 53

[6.1094501]→[6.1094501:1094527](∅→∅)

tools.SetDBPath(*dbPath)
edit in cmd/dataset.go at line 99

[6.1096234]→[6.1096234:1096260](∅→∅)

tools.SetDBPath(*dbPath)
edit in cmd/cluster.go at line 63

[6.1101538]→[6.1101538:1101564](∅→∅)

tools.SetDBPath(*dbPath)
edit in cmd/cluster.go at line 119

[6.1103457]→[6.1103457:1103483](∅→∅)

tools.SetDBPath(*dbPath)
replacement in cmd/calls_push_certainty.go at line 8

[6.1104046]→[6.1104046:1104062](∅→∅)

"skraak/tools"

[6.1104046]

[6.1104062]

"skraak/tools/calls"
replacement in cmd/calls_push_certainty.go at line 135

[6.1572]→[6.1108868:1108906](∅→∅),[6.1108868]→[6.1108868:1108906](∅→∅)

config := tools.PushCertaintyConfig{

[6.1572]

[6.26591]

config := calls.PushCertaintyConfig{
replacement in cmd/calls_push_certainty.go at line 149

[6.1109142]→[6.1109142:1109186](∅→∅)

result, err := tools.PushCertainty(config)

[6.1109142]

[6.1109186]

result, err := calls.PushCertainty(config)
replacement in cmd/calls_propagate.go at line 9

[6.1109860]→[6.1109860:1109876](∅→∅)

"skraak/tools"

[6.1109860]

[6.1109876]

"skraak/tools/calls"
replacement in cmd/calls_propagate.go at line 121

[6.1115880]→[6.1115880:1115945](∅→∅)

result, err := tools.CallsPropagate(tools.CallsPropagateInput{

[6.1115880]

[6.1115945]

result, err := calls.CallsPropagate(calls.CallsPropagateInput{
replacement in cmd/calls_propagate.go at line 136

[6.1116268]→[6.1116268:1116344](∅→∅)

result, err := tools.CallsPropagateFolder(tools.CallsPropagateFolderInput{

[6.1116268]

[6.1116344]

result, err := calls.CallsPropagateFolder(calls.CallsPropagateFolderInput{
replacement in cmd/calls_modify.go at line 10

[6.1117201]→[6.1117201:1117217](∅→∅)

"skraak/tools"

[6.1117201]

[6.1117217]

"skraak/tools/calls"
replacement in cmd/calls_modify.go at line 159

[6.27390]→[6.1123339:1123373](∅→∅),[6.1123339]→[6.1123339:1123373](∅→∅)

input := tools.CallsModifyInput{

[6.27390]

[6.27391]

input := calls.CallsModifyInput{
replacement in cmd/calls_modify.go at line 172

[6.1123589]→[6.1123589:1123630](∅→∅)

result, err := tools.CallsModify(input)

[6.1123577]

[6.1123630]

result, err := calls.CallsModify(input)
replacement in cmd/calls_detect_anomalies.go at line 8

[6.1123892]→[6.1123892:1123908](∅→∅)

"skraak/tools"

[6.1123892]

[6.1123908]

"skraak/tools/calls"
replacement in cmd/calls_detect_anomalies.go at line 110

[6.1127861]→[6.1127861:1127927](∅→∅)

output, err := tools.DetectAnomalies(tools.DetectAnomaliesInput{

[6.1127861]

[6.1127927]

output, err := calls.DetectAnomalies(calls.DetectAnomaliesInput{
replacement in cmd/calls_clip_labels.go at line 10

[6.1128642]→[6.1128642:1128658](∅→∅)

"skraak/tools"

[6.1128642]

[6.1128658]

"skraak/tools/calls"
replacement in cmd/calls_clip_labels.go at line 52

[6.1131079]→[6.1131079:1131117](∅→∅)

input := tools.CallsClipLabelsInput{

[6.1131079]

[6.1131117]

input := calls.CallsClipLabelsInput{
replacement in cmd/calls_clip_labels.go at line 72

[6.1131766]→[6.1131766:1131808](∅→∅)

out, err := tools.CallsClipLabels(input)

[6.1131766]

[6.1131808]

out, err := calls.CallsClipLabels(input)
replacement in cmd/calls_clip.go at line 9

[6.1133159]→[6.1133159:1133175](∅→∅)

"skraak/tools"

[6.1133159]

[6.1133175]

"skraak/tools/calls"
replacement in cmd/calls_clip.go at line 148

[6.1141025]→[6.1141025:1141057](∅→∅)

input := tools.CallsClipInput{

[6.1141025]

[6.28933]

input := calls.CallsClipInput{
replacement in cmd/calls_clip.go at line 164

[6.1141380]→[6.1141380:1141419](∅→∅)

result, err := tools.CallsClip(input)

[6.1141380]

[6.1141419]

result, err := calls.CallsClip(input)
replacement in cmd/calls_classify.go at line 10

[6.1141784]→[6.1141784:1141800](∅→∅)

"skraak/tools"

[6.1141784]

[6.1141800]

"skraak/tools/calls"
replacement in cmd/calls_classify.go at line 155

[6.8424]→[6.8424:8511](∅→∅)

func validateBindings(cfg *utils.Config, cfgPath string) ([]tools.KeyBinding, error) {

[6.8424]

[6.1149871]

func validateBindings(cfg *utils.Config, cfgPath string) ([]calls.KeyBinding, error) {
replacement in cmd/calls_classify.go at line 157

[6.1149949]→[6.1149949:1150018](∅→∅)

bindings := make([]tools.KeyBinding, 0, len(cfg.Classify.Bindings))

[6.1149949]

[6.1150018]

bindings := make([]calls.KeyBinding, 0, len(cfg.Classify.Bindings))
replacement in cmd/calls_classify.go at line 236

[6.1151645]→[6.1151645:1151678](∅→∅)

config := tools.ClassifyConfig{

[6.1151645]

[6.12323]

config := calls.ClassifyConfig{
replacement in cmd/calls_classify.go at line 260

[6.1152375]→[6.1152375:1152418](∅→∅)

state, err := tools.LoadDataFiles(config)

[6.1152375]

[6.1152418]

state, err := calls.LoadDataFiles(config)
replacement in cmd/calls_classify.go at line 290

[6.1153232]→[6.1153232:1153276](∅→∅)

func parseBind(s string) tools.KeyBinding {

[6.1153232]

[6.1153276]

func parseBind(s string) calls.KeyBinding {
replacement in cmd/calls_classify.go at line 302

[6.1153592]→[6.1153592:1153619](∅→∅)

return tools.KeyBinding{

[6.1153592]

[6.1153619]

return calls.KeyBinding{
replacement in cmd/calls_classify.go at line 310

[6.1153718]→[6.1153718:1153744](∅→∅)

return tools.KeyBinding{

[6.1153718]

[6.1153744]

return calls.KeyBinding{
replacement in cmd/calls.go at line 9

[6.1153889]→[6.1153889:1153905](∅→∅)

"skraak/tools"

[6.1153889]

[6.1153905]

"skraak/tools/calls"
replacement in cmd/calls.go at line 148

[6.1160560]→[6.1160560:1160615](∅→∅)

filterName = tools.ParseFilterFromFilename(*csvPath)

[6.1160560]

[6.1160615]

filterName = calls.ParseFilterFromFilename(*csvPath)
replacement in cmd/calls.go at line 154

[6.1160882]→[6.1160882:1160919](∅→∅)

input := tools.CallsFromPredsInput{

[6.1160882]

[6.1160919]

input := calls.CallsFromPredsInput{
replacement in cmd/calls.go at line 181

[6.1161814]→[6.1161814:1161858](∅→∅)

output, err := tools.CallsFromPreds(input)

[6.1161814]

[6.1161858]

output, err := calls.CallsFromPreds(input)
replacement in cmd/calls.go at line 234

[6.1163871]→[6.1163871:1163909](∅→∅)

input := tools.CallsShowImagesInput{

[6.1163871]

[6.1163909]

input := calls.CallsShowImagesInput{
replacement in cmd/calls.go at line 247

[6.1164198]→[6.1164198:1164243](∅→∅)

output, err := tools.CallsShowImages(input)

[6.1164198]

[6.1164243]

output, err := calls.CallsShowImages(input)
replacement in cmd/calls.go at line 312

[6.1167078]→[6.1167078:1167115](∅→∅)

input := tools.CallsFromBirdaInput{

[6.1167078]

[6.1167115]

input := calls.CallsFromBirdaInput{
replacement in cmd/calls.go at line 337

[6.1167752]→[6.1167752:1167796](∅→∅)

output, err := tools.CallsFromBirda(input)

[6.1167752]

[6.1167796]

output, err := calls.CallsFromBirda(input)
replacement in cmd/calls.go at line 417

[6.1171205]→[6.1171205:1171242](∅→∅)

input := tools.CallsFromRavenInput{

[6.1171205]

[6.1171242]

input := calls.CallsFromRavenInput{
replacement in cmd/calls.go at line 442

[6.1171878]→[6.1171878:1171922](∅→∅)

output, err := tools.CallsFromRaven(input)

[6.1171878]

[6.1171922]

output, err := calls.CallsFromRaven(input)
replacement in cmd/calls.go at line 544

[6.1176445]→[6.1176445:1176482](∅→∅)

input := tools.CallsSummariseInput{

[6.1176445]

[6.1176482]

input := calls.CallsSummariseInput{
replacement in cmd/calls.go at line 555

[6.1176685]→[6.1176685:1176729](∅→∅)

output, err := tools.CallsSummarise(input)

[6.1176685]

[6.1176729]

output, err := calls.CallsSummarise(input)
replacement in CLAUDE.md at line 20

[6.1196332]→[3.9861:9940](∅→∅)

tools/*.go → CLI tools (one file per tool, defines input/output types)

[6.1196332]

[6.7389]

tools/*.go → CLI tools: sql, export, cluster, dataset, location, pattern, time, prepend
tools/calls/ → Call processing (filesystem .data/WAV, NO database access)
tools/import/ → Import operations (bulk, file, files, segments, unstructured)
edit in CHANGELOG.md at line 4

[6.1198010]

[5.173]

## [2026-05-12] Stream 7: tools/ package split + SetDBPath removal

Split tools/ into three packages to improve navigation and reduce coupling:

### tools/calls/ (13 source + 11 test + 3 utility files, 4563 lines)
- All calls_* processing — purely filesystem-based, NO database access
- avianz_types.go, parallel_aggregate.go, isnight.go
- Package name: `calls` (import: `skraak/tools/calls`)

### tools/import/ (5 source + 1 test files, 2078 lines)
- import_file, import_files, import_segments, import_unstructured, bulk_file_import
- Package name: `imp` (import: `imp "skraak/tools/import"`)
(`import` is a Go keyword, so `imp` is used as the package identifier)

### tools/ (8 source + 4 test files, remaining ~1700 lines)
- sql, export, cluster, dataset, location, pattern, time, prepend
edit in CHANGELOG.md at line 22

[5.174]

[5.174]

### SetDBPath removal
- Removed global `var dbPath string` and `SetDBPath()` from tools/sql.go
- All callers already pass `Input.DBPath` — the global was redundant
- Test files updated: `SetDBPath(testDB)` → `DBPath: testDB` in Input structs
- Added `db.ResolveDBPath()` helper for the resolveDBPath pattern

### depguard updates
- New rules for tools/calls/ and tools/import/ packages
- tui/ may import tools/calls but not tools
- tools/ may not import sub-packages
- tools/calls/ and tools/import/ may not import parent tools/ package

### Cross-boundary dependency resolution
- `resolveDBPath()` → each package calls `db.ResolveDBPath()` directly
- `calls_clip_bench_test.go` path fix: `../audio/` → `../../audio/`
- No unexported symbols cross package boundaries (verified by analysis)
replacement in .golangci.yml at line 39

[6.3780]→[6.3780:3822](∅→∅)

# cmd → tools, tui, utils, db

[6.3780]

[6.3822]

# cmd → tools, tools/calls, tools/import, tui, utils, db
# tools/calls → utils, db
# tools/import → utils, db
replacement in .golangci.yml at line 43

[6.3854]→[6.3854:3887](∅→∅)

# tui → tools, utils

[6.3854]

[6.3887]

# tui → tools/calls, utils
edit in .golangci.yml at line 76

[6.4899]

[6.4899]

- pkg: "skraak/tools$"
desc: "tui must import from tools/calls, not tools"
calls:
files:
- "**/tools/calls/*.go"
deny:
- pkg: "skraak/cmd"
desc: "tools/calls must not import cmd"
- pkg: "skraak/tools"
desc: "tools/calls must not import parent package"
- pkg: "skraak/tui"
desc: "tools/calls must not import tui"
import:
files:
- "**/tools/import/*.go"
deny:
- pkg: "skraak/cmd"
desc: "tools/import must not import cmd"
- pkg: "skraak/tools"
desc: "tools/import must not import parent package"
- pkg: "skraak/tui"
desc: "tools/import must not import tui"
edit in .golangci.yml at line 106

[6.5137]

[6.5137]

- pkg: "skraak/tools/calls"
desc: "tools must not import tools/calls (sub-package)"
- pkg: "skraak/tools/import"
desc: "tools must not import tools/import (sub-package)"