calls_classify.go
package calls
import (
"fmt"
"math/rand"
"os"
"path/filepath"
"sort"
"strings"
"time"
"skraak/audio"
"skraak/datafile"
)
// KeyBinding maps a key to a species/calltype for TUI classification.
// See classify_bindings.go for KeyBinding type and related methods.
// ClassifyConfig holds the configuration for classification.
//
// TUI-only fields (used only by the classify TUI, not CLI tools):
// - Color: enable color output for spectrograms
// - Sixel: use Sixel graphics protocol
// - ITerm: use iTerm2 inline image protocol
// - ImageSize: spectrogram display size in pixels
// - Bindings: keybindings for species/calltype assignment
// - SecondaryBindings: secondary keybindings for calltype after species
// - BandpassLow/BandpassHigh: bandpass filter for audio playback
// - Night/Day: time-of-day filters for file selection
// - Lat/Lng/Timezone: location data for astronomical calculations
//
// These fields are ignored by CLI tools that use ClassifyConfig purely
// for file/segment filtering (Filter, Species, CallType, Certainty, Sample, Goto).
type ClassifyConfig struct {
Folder string
File string
Filter string
Species string // scope to this species (optional)
CallType string // scope to this calltype within species (optional)
Certainty int // scope to this certainty value, -1 = no filter (optional)
Sample int // random sample percentage 1-99, -1 = no sampling, 100 = no-op
Goto string // goto this file on startup (optional, basename match)
Reviewer string
Color bool
ImageSize int // spectrogram display size in pixels (0 = default)
Sixel bool
ITerm bool
Bindings []KeyBinding
// SecondaryBindings maps a primary binding key to per-species calltype
// keys. Invoked via Shift+primary-key: the species is labeled without
// advancing, and the next key is interpreted as a calltype.
SecondaryBindings map[string]map[string]string
BandpassLow float64 // bandpass filter low frequency (Hz), 0 = no filter
BandpassHigh float64 // bandpass filter high frequency (Hz), 0 = no filter
Night bool
Day bool
Lat float64
Lng float64
Timezone string
}
// ClassifyState holds the current state for TUI
type ClassifyState struct {
Config ClassifyConfig
DataFiles []*datafile.DataFile
filteredSegs [][]*datafile.Segment // cached at load time, parallel to DataFiles
totalSegs int // pre-computed total segment count
FileIdx int
SegmentIdx int
Dirty bool
Player *audio.AudioPlayer
PlaybackSpeed float64 // Current playback speed (1.0 = normal, 0.5 = half speed)
TimeFilteredCount int // files skipped by --night or --day filter
}
// LoadDataFiles loads all .data files for classification
// findDataFilePaths resolves the list of .data file paths from config.
func findDataFilePaths(config ClassifyConfig) ([]string, error) {
if config.File != "" {
return []string{config.File}, nil
}
paths, err := datafile.FindDataFiles(config.Folder)
if err != nil {
return nil, fmt.Errorf("find data files: %w", err)
}
return paths, nil
}
// filterDataFileSegments applies segment and day/night filters to a single data file.
// Returns the filtered segments and whether the file should be kept.
// If the file is filtered out (no matching segments, or time-of-day), returns nil, false.
func filterDataFileSegments(df *datafile.DataFile, config ClassifyConfig) ([]*datafile.Segment, bool, int) {
segs := filterSegmentsByLabel(df.Segments, config)
if segs == nil {
return nil, false, 0
}
timeFiltered := 0
if config.Night || config.Day {
keep, tf := filterByTimeOfDay(df.FilePath, config)
if !keep {
return nil, false, tf
}
}
return segs, true, timeFiltered
}
// filterSegmentsByLabel applies label/species/certainty filters, returning matching segments.
// Returns nil if no segments match (caller should skip the file).
func filterSegmentsByLabel(segments []*datafile.Segment, config ClassifyConfig) []*datafile.Segment {
hasFilter := config.Filter != "" || config.Species != "" || config.Certainty >= 0
if !hasFilter {
return segments
}
var segs []*datafile.Segment
for _, seg := range segments {
if seg.SegmentMatchesFilters(config.Filter, config.Species, config.CallType, config.Certainty) {
segs = append(segs, seg)
}
}
return segs // nil if empty, caller treats as "skip"
}
// filterByTimeOfDay checks --night/--day time-of-day filter for a .data file.
// Returns (keep, timeFilteredCount).
func filterByTimeOfDay(dataFilePath string, config ClassifyConfig) (bool, int) {
wavPath := filepath.Clean(strings.TrimSuffix(dataFilePath, ".data"))
result, err := IsNight(IsNightInput{
FilePath: wavPath,
Lat: config.Lat,
Lng: config.Lng,
Timezone: config.Timezone,
})
if err != nil {
fmt.Fprintf(os.Stderr, "warning: skipping %s (isnight error: %v)\n", wavPath, err)
return false, 1
}
if config.Night && !result.SolarNight {
return false, 1
}
if config.Day && !result.DiurnalActive {
return false, 1
}
return true, 0
}
func LoadDataFiles(config ClassifyConfig) (*ClassifyState, error) {
dataFiles, err := parseAndSortDataFiles(config)
if err != nil {
return nil, err
}
kept, cachedSegs, timeFiltered := filterDataFiles(dataFiles, config)
if config.Sample > 0 && config.Sample < 100 {
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
kept, cachedSegs = applySampling(kept, cachedSegs, config.Sample, rng)
}
return buildClassifyState(config, kept, cachedSegs, timeFiltered)
}
// parseAndSortDataFiles finds, parses, and sorts .data files from the config.
func parseAndSortDataFiles(config ClassifyConfig) ([]*datafile.DataFile, error) {
filePaths, err := findDataFilePaths(config)
if err != nil {
return nil, err
}
if len(filePaths) == 0 {
return nil, fmt.Errorf("no .data files found")
}
var dataFiles []*datafile.DataFile
for _, path := range filePaths {
df, err := datafile.ParseDataFile(path)
if err != nil {
continue
}
dataFiles = append(dataFiles, df)
}
if len(dataFiles) == 0 {
return nil, fmt.Errorf("no valid .data files")
}
sort.Slice(dataFiles, func(i, j int) bool {
return dataFiles[i].FilePath < dataFiles[j].FilePath
})
return dataFiles, nil
}
// filterDataFiles applies segment filters to each data file, returning kept files and their segments.
func filterDataFiles(dataFiles []*datafile.DataFile, config ClassifyConfig) ([]*datafile.DataFile, [][]*datafile.Segment, int) {
var kept []*datafile.DataFile
var cachedSegs [][]*datafile.Segment
var timeFiltered int
for _, df := range dataFiles {
segs, keep, tf := filterDataFileSegments(df, config)
timeFiltered += tf
if !keep {
continue
}
kept = append(kept, df)
cachedSegs = append(cachedSegs, segs)
}
return kept, cachedSegs, timeFiltered
}
// buildClassifyState constructs the ClassifyState, handling --goto file positioning.
func buildClassifyState(config ClassifyConfig, dataFiles []*datafile.DataFile, filteredSegs [][]*datafile.Segment, timeFiltered int) (*ClassifyState, error) {
total := 0
for _, segs := range filteredSegs {
total += len(segs)
}
state := &ClassifyState{
Config: config,
DataFiles: dataFiles,
filteredSegs: filteredSegs,
totalSegs: total,
TimeFilteredCount: timeFiltered,
}
if config.Goto == "" {
return state, nil
}
for i, df := range state.DataFiles {
base := df.FilePath[strings.LastIndex(df.FilePath, "/")+1:]
if base == config.Goto {
state.FileIdx = i
return state, nil
}
}
return nil, fmt.Errorf("goto file not found (or has no matching segments): %s", config.Goto)
}
// applySampling randomly selects sample% of segments from the filtered set.
// The returned files and segments preserve the original chronological order.
func applySampling(kept []*datafile.DataFile, cachedSegs [][]*datafile.Segment, sample int, rng *rand.Rand) ([]*datafile.DataFile, [][]*datafile.Segment) {
flat := make([]struct{ fileIdx, segIdx int }, 0)
for fi, segs := range cachedSegs {
for si := range segs {
flat = append(flat, struct{ fileIdx, segIdx int }{fi, si})
}
}
targetCount := max(len(flat)*sample/100, 1)
rng.Shuffle(len(flat), func(i, j int) { flat[i], flat[j] = flat[j], flat[i] })
selected := flat[:targetCount]
// Restore chronological order before rebuilding
sort.Slice(selected, func(i, j int) bool {
if selected[i].fileIdx != selected[j].fileIdx {
return selected[i].fileIdx < selected[j].fileIdx
}
return selected[i].segIdx < selected[j].segIdx
})
newCached := make([][]*datafile.Segment, len(cachedSegs))
for _, ref := range selected {
newCached[ref.fileIdx] = append(newCached[ref.fileIdx], cachedSegs[ref.fileIdx][ref.segIdx])
}
var newKept []*datafile.DataFile
var finalCached [][]*datafile.Segment
for i, segs := range newCached {
if len(segs) > 0 {
newKept = append(newKept, kept[i])
finalCached = append(finalCached, segs)
}
}
return newKept, finalCached
}
// FilteredSegs returns the cached filtered segments parallel to DataFiles.
func (s *ClassifyState) FilteredSegs() [][]*datafile.Segment {
return s.filteredSegs
}
// CurrentFile returns the current data file
func (s *ClassifyState) CurrentFile() *datafile.DataFile {
if s.FileIdx >= len(s.DataFiles) {
return nil
}
return s.DataFiles[s.FileIdx]
}
// CurrentSegment returns the current segment
func (s *ClassifyState) CurrentSegment() *datafile.Segment {
if s.FileIdx >= len(s.filteredSegs) {
return nil
}
segs := s.filteredSegs[s.FileIdx]
if s.SegmentIdx >= len(segs) {
return nil
}
return segs[s.SegmentIdx]
}
// TotalSegments returns total segments to review
func (s *ClassifyState) TotalSegments() int {
return s.totalSegs
}
// CurrentSegmentNumber returns 1-based segment number
func (s *ClassifyState) CurrentSegmentNumber() int {
count := 0
for i := 0; i < s.FileIdx; i++ {
count += len(s.filteredSegs[i])
}
return count + s.SegmentIdx + 1
}
// NextSegment moves to the next segment, returns false if at end
func (s *ClassifyState) NextSegment() bool {
if s.FileIdx >= len(s.filteredSegs) {
return false
}
segs := s.filteredSegs[s.FileIdx]
if s.SegmentIdx+1 < len(segs) {
s.SegmentIdx++
return true
}
// Move to next file
if s.FileIdx+1 < len(s.DataFiles) {
s.FileIdx++
s.SegmentIdx = 0
return true
}
return false
}
// PrevSegment moves to the previous segment, returns false if at start
func (s *ClassifyState) PrevSegment() bool {
if s.SegmentIdx > 0 {
s.SegmentIdx--
return true
}
// Move to previous file
if s.FileIdx > 0 {
s.FileIdx--
segs := s.filteredSegs[s.FileIdx]
s.SegmentIdx = max(len(segs)-1, 0)
return true
}
return false
}
// Save saves the current file
func (s *ClassifyState) Save() error {
df := s.CurrentFile()
if df == nil {
return nil
}
if !s.Dirty {
return nil
}
err := df.Write(df.FilePath)
if err != nil {
return err
}
s.Dirty = false
return nil
}