quietlight / skraakCLI

{/} code [~] changes [>] discussions [*] jobs
calls_classify.go
package calls

import (
	"fmt"
	"math/rand"
	"os"
	"path/filepath"
	"sort"
	"strings"
	"time"

	"skraak/audio"
	"skraak/datafile"
)

// KeyBinding maps a key to a species/calltype for TUI classification.
// See classify_bindings.go for KeyBinding type and related methods.

// ClassifyConfig holds the configuration for classification.
//
// TUI-only fields (used only by the classify TUI, not CLI tools):
//   - Color: enable color output for spectrograms
//   - Sixel: use Sixel graphics protocol
//   - ITerm: use iTerm2 inline image protocol
//   - ImageSize: spectrogram display size in pixels
//   - Bindings: keybindings for species/calltype assignment
//   - SecondaryBindings: secondary keybindings for calltype after species
//   - BandpassLow/BandpassHigh: bandpass filter for audio playback
//   - Night/Day: time-of-day filters for file selection
//   - Lat/Lng/Timezone: location data for astronomical calculations
//
// These fields are ignored by CLI tools that use ClassifyConfig purely
// for file/segment filtering (Filter, Species, CallType, Certainty, Sample, Goto).
type ClassifyConfig struct {
	Folder    string
	File      string
	Filter    string
	Species   string // scope to this species (optional)
	CallType  string // scope to this calltype within species (optional)
	Certainty int    // scope to this certainty value, -1 = no filter (optional)
	Sample    int    // random sample percentage 1-99, -1 = no sampling, 100 = no-op
	Goto      string // goto this file on startup (optional, basename match)
	Reviewer  string
	Color     bool
	ImageSize int // spectrogram display size in pixels (0 = default)
	Sixel     bool
	ITerm     bool
	Bindings  []KeyBinding
	// SecondaryBindings maps a primary binding key to per-species calltype
	// keys. Invoked via Shift+primary-key: the species is labeled without
	// advancing, and the next key is interpreted as a calltype.
	SecondaryBindings map[string]map[string]string
	BandpassLow       float64 // bandpass filter low frequency (Hz), 0 = no filter
	BandpassHigh      float64 // bandpass filter high frequency (Hz), 0 = no filter
	Night             bool
	Day               bool
	Lat               float64
	Lng               float64
	Timezone          string
}

// ClassifyState holds the current state for TUI
type ClassifyState struct {
	Config            ClassifyConfig
	DataFiles         []*datafile.DataFile
	filteredSegs      [][]*datafile.Segment // cached at load time, parallel to DataFiles
	totalSegs         int                   // pre-computed total segment count
	FileIdx           int
	SegmentIdx        int
	Dirty             bool
	Player            *audio.AudioPlayer
	PlaybackSpeed     float64 // Current playback speed (1.0 = normal, 0.5 = half speed)
	TimeFilteredCount int     // files skipped by --night or --day filter
}

// LoadDataFiles loads all .data files for classification
// findDataFilePaths resolves the list of .data file paths from config.
func findDataFilePaths(config ClassifyConfig) ([]string, error) {
	if config.File != "" {
		return []string{config.File}, nil
	}
	paths, err := datafile.FindDataFiles(config.Folder)
	if err != nil {
		return nil, fmt.Errorf("find data files: %w", err)
	}
	return paths, nil
}

// filterDataFileSegments applies segment and day/night filters to a single data file.
// Returns the filtered segments and whether the file should be kept.
// If the file is filtered out (no matching segments, or time-of-day), returns nil, false.
func filterDataFileSegments(df *datafile.DataFile, config ClassifyConfig) ([]*datafile.Segment, bool, int) {
	segs := filterSegmentsByLabel(df.Segments, config)
	if segs == nil {
		return nil, false, 0
	}

	timeFiltered := 0
	if config.Night || config.Day {
		keep, tf := filterByTimeOfDay(df.FilePath, config)
		if !keep {
			return nil, false, tf
		}
	}
	return segs, true, timeFiltered
}

// filterSegmentsByLabel applies label/species/certainty filters, returning matching segments.
// Returns nil if no segments match (caller should skip the file).
func filterSegmentsByLabel(segments []*datafile.Segment, config ClassifyConfig) []*datafile.Segment {
	hasFilter := config.Filter != "" || config.Species != "" || config.Certainty >= 0
	if !hasFilter {
		return segments
	}
	var segs []*datafile.Segment
	for _, seg := range segments {
		if seg.SegmentMatchesFilters(config.Filter, config.Species, config.CallType, config.Certainty) {
			segs = append(segs, seg)
		}
	}
	return segs // nil if empty, caller treats as "skip"
}

// filterByTimeOfDay checks --night/--day time-of-day filter for a .data file.
// Returns (keep, timeFilteredCount).
func filterByTimeOfDay(dataFilePath string, config ClassifyConfig) (bool, int) {
	wavPath := filepath.Clean(strings.TrimSuffix(dataFilePath, ".data"))
	result, err := IsNight(IsNightInput{
		FilePath: wavPath,
		Lat:      config.Lat,
		Lng:      config.Lng,
		Timezone: config.Timezone,
	})
	if err != nil {
		fmt.Fprintf(os.Stderr, "warning: skipping %s (isnight error: %v)\n", wavPath, err)
		return false, 1
	}
	if config.Night && !result.SolarNight {
		return false, 1
	}
	if config.Day && !result.DiurnalActive {
		return false, 1
	}
	return true, 0
}

func LoadDataFiles(config ClassifyConfig) (*ClassifyState, error) {
	dataFiles, err := parseAndSortDataFiles(config)
	if err != nil {
		return nil, err
	}

	kept, cachedSegs, timeFiltered := filterDataFiles(dataFiles, config)

	if config.Sample > 0 && config.Sample < 100 {
		rng := rand.New(rand.NewSource(time.Now().UnixNano()))
		kept, cachedSegs = applySampling(kept, cachedSegs, config.Sample, rng)
	}

	return buildClassifyState(config, kept, cachedSegs, timeFiltered)
}

// parseAndSortDataFiles finds, parses, and sorts .data files from the config.
func parseAndSortDataFiles(config ClassifyConfig) ([]*datafile.DataFile, error) {
	filePaths, err := findDataFilePaths(config)
	if err != nil {
		return nil, err
	}
	if len(filePaths) == 0 {
		return nil, fmt.Errorf("no .data files found")
	}

	var dataFiles []*datafile.DataFile
	for _, path := range filePaths {
		df, err := datafile.ParseDataFile(path)
		if err != nil {
			continue
		}
		dataFiles = append(dataFiles, df)
	}
	if len(dataFiles) == 0 {
		return nil, fmt.Errorf("no valid .data files")
	}

	sort.Slice(dataFiles, func(i, j int) bool {
		return dataFiles[i].FilePath < dataFiles[j].FilePath
	})

	return dataFiles, nil
}

// filterDataFiles applies segment filters to each data file, returning kept files and their segments.
func filterDataFiles(dataFiles []*datafile.DataFile, config ClassifyConfig) ([]*datafile.DataFile, [][]*datafile.Segment, int) {
	var kept []*datafile.DataFile
	var cachedSegs [][]*datafile.Segment
	var timeFiltered int

	for _, df := range dataFiles {
		segs, keep, tf := filterDataFileSegments(df, config)
		timeFiltered += tf
		if !keep {
			continue
		}
		kept = append(kept, df)
		cachedSegs = append(cachedSegs, segs)
	}
	return kept, cachedSegs, timeFiltered
}

// buildClassifyState constructs the ClassifyState, handling --goto file positioning.
func buildClassifyState(config ClassifyConfig, dataFiles []*datafile.DataFile, filteredSegs [][]*datafile.Segment, timeFiltered int) (*ClassifyState, error) {
	total := 0
	for _, segs := range filteredSegs {
		total += len(segs)
	}

	state := &ClassifyState{
		Config:            config,
		DataFiles:         dataFiles,
		filteredSegs:      filteredSegs,
		totalSegs:         total,
		TimeFilteredCount: timeFiltered,
	}

	if config.Goto == "" {
		return state, nil
	}

	for i, df := range state.DataFiles {
		base := df.FilePath[strings.LastIndex(df.FilePath, "/")+1:]
		if base == config.Goto {
			state.FileIdx = i
			return state, nil
		}
	}
	return nil, fmt.Errorf("goto file not found (or has no matching segments): %s", config.Goto)
}

// applySampling randomly selects sample% of segments from the filtered set.
// The returned files and segments preserve the original chronological order.
func applySampling(kept []*datafile.DataFile, cachedSegs [][]*datafile.Segment, sample int, rng *rand.Rand) ([]*datafile.DataFile, [][]*datafile.Segment) {
	flat := make([]struct{ fileIdx, segIdx int }, 0)
	for fi, segs := range cachedSegs {
		for si := range segs {
			flat = append(flat, struct{ fileIdx, segIdx int }{fi, si})
		}
	}

	targetCount := max(len(flat)*sample/100, 1)

	rng.Shuffle(len(flat), func(i, j int) { flat[i], flat[j] = flat[j], flat[i] })
	selected := flat[:targetCount]

	// Restore chronological order before rebuilding
	sort.Slice(selected, func(i, j int) bool {
		if selected[i].fileIdx != selected[j].fileIdx {
			return selected[i].fileIdx < selected[j].fileIdx
		}
		return selected[i].segIdx < selected[j].segIdx
	})

	newCached := make([][]*datafile.Segment, len(cachedSegs))
	for _, ref := range selected {
		newCached[ref.fileIdx] = append(newCached[ref.fileIdx], cachedSegs[ref.fileIdx][ref.segIdx])
	}

	var newKept []*datafile.DataFile
	var finalCached [][]*datafile.Segment
	for i, segs := range newCached {
		if len(segs) > 0 {
			newKept = append(newKept, kept[i])
			finalCached = append(finalCached, segs)
		}
	}
	return newKept, finalCached
}

// FilteredSegs returns the cached filtered segments parallel to DataFiles.
func (s *ClassifyState) FilteredSegs() [][]*datafile.Segment {
	return s.filteredSegs
}

// CurrentFile returns the current data file
func (s *ClassifyState) CurrentFile() *datafile.DataFile {
	if s.FileIdx >= len(s.DataFiles) {
		return nil
	}
	return s.DataFiles[s.FileIdx]
}

// CurrentSegment returns the current segment
func (s *ClassifyState) CurrentSegment() *datafile.Segment {
	if s.FileIdx >= len(s.filteredSegs) {
		return nil
	}
	segs := s.filteredSegs[s.FileIdx]
	if s.SegmentIdx >= len(segs) {
		return nil
	}
	return segs[s.SegmentIdx]
}

// TotalSegments returns total segments to review
func (s *ClassifyState) TotalSegments() int {
	return s.totalSegs
}

// CurrentSegmentNumber returns 1-based segment number
func (s *ClassifyState) CurrentSegmentNumber() int {
	count := 0
	for i := 0; i < s.FileIdx; i++ {
		count += len(s.filteredSegs[i])
	}
	return count + s.SegmentIdx + 1
}

// NextSegment moves to the next segment, returns false if at end
func (s *ClassifyState) NextSegment() bool {
	if s.FileIdx >= len(s.filteredSegs) {
		return false
	}

	segs := s.filteredSegs[s.FileIdx]
	if s.SegmentIdx+1 < len(segs) {
		s.SegmentIdx++
		return true
	}

	// Move to next file
	if s.FileIdx+1 < len(s.DataFiles) {
		s.FileIdx++
		s.SegmentIdx = 0
		return true
	}

	return false
}

// PrevSegment moves to the previous segment, returns false if at start
func (s *ClassifyState) PrevSegment() bool {
	if s.SegmentIdx > 0 {
		s.SegmentIdx--
		return true
	}

	// Move to previous file
	if s.FileIdx > 0 {
		s.FileIdx--
		segs := s.filteredSegs[s.FileIdx]
		s.SegmentIdx = max(len(segs)-1, 0)
		return true
	}

	return false
}

// Save saves the current file
func (s *ClassifyState) Save() error {
	df := s.CurrentFile()
	if df == nil {
		return nil
	}

	if !s.Dirty {
		return nil
	}

	err := df.Write(df.FilePath)
	if err != nil {
		return err
	}

	s.Dirty = false
	return nil
}
Fork channel

Rename channel

Delete channel