consolidated shared types from calls_from_preds.go into calls_from_common.go
Dependencies
- [2]
3DVPQOKBbig tidy up of tools/ - [3]
XU7FTYK3third phase of utils refactor, wav/ - [4]
V2HX6HEBclaude going nuts all over the place - [*]
KZKLAINJrun out of space on nest, cleaned out
Change contents
- edit in tools/calls/calls_from_preds.go at line 28
// ClusteredCall represents a clustered bird call detectiontype ClusteredCall struct {File string `json:"file"`StartTime float64 `json:"start_time"`EndTime float64 `json:"end_time"`EbirdCode string `json:"ebird_code"`Segments int `json:"segments"`} - edit in tools/calls/calls_from_preds.go at line 55
}// AviaNZ .data file types// predFileSpeciesKey groups detections by file and ebird codetype predFileSpeciesKey struct {File stringEbirdCode string - edit in tools/calls/calls_from_preds.go at line 228
}}}// clusterDetections groups detections into clusters and produces sorted ClusteredCallsfunc clusterDetections(detections map[predFileSpeciesKey][]float64, clipDuration, gapThreshold float64, minDetections int) ([]ClusteredCall, map[string]int) {var allCalls []ClusteredCallspeciesCount := make(map[string]int)for key, startTimes := range detections {sort.Float64s(startTimes)clusters := clusterStartTimes(startTimes, gapThreshold)for _, cluster := range clusters {if len(cluster) <= minDetections {continue}call := ClusteredCall{File: key.File,StartTime: cluster[0],EndTime: cluster[len(cluster)-1] + clipDuration,EbirdCode: key.EbirdCode,Segments: len(cluster),}allCalls = append(allCalls, call)speciesCount[key.EbirdCode]++ - edit in tools/calls/calls_from_preds.go at line 230
sort.Slice(allCalls, func(i, j int) bool {if allCalls[i].File != allCalls[j].File {return allCalls[i].File < allCalls[j].File}return allCalls[i].StartTime < allCalls[j].StartTime})return allCalls, speciesCount - edit in tools/calls/calls_from_preds.go at line 232
// DirCache caches directory entries for fast WAV file lookup.// Scans the directory once and builds a map from lowercased basename to full filename.// Safe for concurrent read-only use after construction.type DirCache struct {dir stringwavMap map[string]string // lowercase basename -> filename with original case (e.g. "20230610_150000" -> "20230610_150000.WAV")dirMap map[string]string // lowercase basename -> filename for any file (used by from-raven for .selections.txt etc.)}// NewDirCache creates a DirCache by scanning the directory once.func NewDirCache(dir string) *DirCache {entries, err := os.ReadDir(dir)if err != nil {return &DirCache{dir: dir, wavMap: make(map[string]string), dirMap: make(map[string]string)}}wavMap := make(map[string]string, len(entries))dirMap := make(map[string]string, len(entries))for _, entry := range entries {if entry.IsDir() {continue}name := entry.Name()ext := filepath.Ext(name)base := strings.TrimSuffix(name, ext)dirMap[strings.ToLower(base)] = nameif strings.EqualFold(ext, ".wav") {wavMap[strings.ToLower(base)] = name}}return &DirCache{dir: dir, wavMap: wavMap, dirMap: dirMap}}// FindWAV looks up a WAV file by basename (case-insensitive).// Returns the full path with correct case, or empty string if not found.func (dc *DirCache) FindWAV(baseName string) string {if name, ok := dc.wavMap[strings.ToLower(baseName)]; ok {return filepath.Join(dc.dir, name)}return ""}// FindFile looks up any file by basename (case-insensitive).// Returns the full path with correct case, or empty string if not found.func (dc *DirCache) FindFile(baseName string) string {if name, ok := dc.dirMap[strings.ToLower(baseName)]; ok {return filepath.Join(dc.dir, name)}return ""}// findWAVFile finds a WAV file in the directory with case-insensitive matching.// baseName is the filename without extension (e.g., "20230610_150000").// Returns the full path with correct case, or empty string if not found.// Deprecated: Use DirCache.FindWAV for batch operations to avoid repeated directory scans.func findWAVFile(dir, baseName string) string {entries, err := os.ReadDir(dir)if err != nil {return ""}for _, entry := range entries {if entry.IsDir() {continue}name := entry.Name()ext := filepath.Ext(name)nameNoExt := strings.TrimSuffix(name, ext)if nameNoExt == baseName && strings.EqualFold(ext, ".wav") {return filepath.Join(dir, name)}}return ""} - edit in tools/calls/calls_from_preds.go at line 547
// ParseFilterFromFilename extracts filter name from preds CSV filename// "predsST_opensoundscape-kiwi-1.2_2025-11-12.csv" -> "opensoundscape-kiwi-1.2"// Returns empty string if parsing failsfunc ParseFilterFromFilename(csvPath string) string {filename := filepath.Base(csvPath)// Remove .csv extensionname := strings.TrimSuffix(filename, ".csv")// Split on underscoreparts := strings.Split(name, "_")if len(parts) == 3 {return parts[1]}return ""}// clusterStartTimes groups consecutive start times into clusters// where the gap between consecutive times is <= gapThresholdfunc clusterStartTimes(startTimes []float64, gapThreshold float64) [][]float64 {if len(startTimes) == 0 {return nil}var clusters [][]float64currentCluster := []float64{startTimes[0]}for i := 1; i < len(startTimes); i++ {gap := startTimes[i] - startTimes[i-1]if gap <= gapThreshold {// Same clustercurrentCluster = append(currentCluster, startTimes[i])} else {// New clusterclusters = append(clusters, currentCluster)currentCluster = []float64{startTimes[i]}}}// Don't forget the last clusterclusters = append(clusters, currentCluster)return clusters} - edit in tools/calls/calls_from_common.go at line 8
"strings" - edit in tools/calls/calls_from_common.go at line 286
}}}// ClusteredCall represents a clustered bird call detection.// Shared by all call source implementations (preds, birda, raven).type ClusteredCall struct {File string `json:"file"`StartTime float64 `json:"start_time"`EndTime float64 `json:"end_time"`EbirdCode string `json:"ebird_code"`Segments int `json:"segments"`}// DirCache caches directory entries for fast WAV file lookup.// Scans the directory once and builds a map from lowercased basename to full filename.// Safe for concurrent read-only use after construction.type DirCache struct {dir stringwavMap map[string]string // lowercase basename -> filename with original case (e.g. "20230610_150000" -> "20230610_150000.WAV")dirMap map[string]string // lowercase basename -> filename for any file (used by from-raven for .selections.txt etc.)}// NewDirCache creates a DirCache by scanning the directory once.func NewDirCache(dir string) *DirCache {entries, err := os.ReadDir(dir)if err != nil {return &DirCache{dir: dir, wavMap: make(map[string]string), dirMap: make(map[string]string)}}wavMap := make(map[string]string, len(entries))dirMap := make(map[string]string, len(entries))for _, entry := range entries {if entry.IsDir() {continue}name := entry.Name()ext := filepath.Ext(name)base := strings.TrimSuffix(name, ext)dirMap[strings.ToLower(base)] = nameif strings.EqualFold(ext, ".wav") {wavMap[strings.ToLower(base)] = name}}return &DirCache{dir: dir, wavMap: wavMap, dirMap: dirMap}}// FindWAV looks up a WAV file by basename (case-insensitive).// Returns the full path with correct case, or empty string if not found.func (dc *DirCache) FindWAV(baseName string) string {if name, ok := dc.wavMap[strings.ToLower(baseName)]; ok {return filepath.Join(dc.dir, name)}return ""}// FindFile looks up any file by basename (case-insensitive).// Returns the full path with correct case, or empty string if not found.func (dc *DirCache) FindFile(baseName string) string {if name, ok := dc.dirMap[strings.ToLower(baseName)]; ok {return filepath.Join(dc.dir, name)}return ""}// findWAVFile finds a WAV file in the directory with case-insensitive matching.// baseName is the filename without extension (e.g., "20230610_150000").// Returns the full path with correct case, or empty string if not found.// Deprecated: Use DirCache.FindWAV for batch operations to avoid repeated directory scans.func findWAVFile(dir, baseName string) string {entries, err := os.ReadDir(dir)if err != nil {return ""}for _, entry := range entries {if entry.IsDir() {continue}name := entry.Name()ext := filepath.Ext(name)nameNoExt := strings.TrimSuffix(name, ext)if nameNoExt == baseName && strings.EqualFold(ext, ".wav") {return filepath.Join(dir, name)}}return ""}// ParseFilterFromFilename extracts filter name from preds CSV filename.// "predsST_opensoundscape-kiwi-1.2_2025-11-12.csv" -> "opensoundscape-kiwi-1.2"// Returns empty string if parsing fails.func ParseFilterFromFilename(csvPath string) string {filename := filepath.Base(csvPath)// Remove .csv extensionname := strings.TrimSuffix(filename, ".csv")// Split on underscoreparts := strings.Split(name, "_")if len(parts) == 3 {return parts[1]}return ""}// clusterStartTimes groups consecutive start times into clusters// where the gap between consecutive times is <= gapThreshold.func clusterStartTimes(startTimes []float64, gapThreshold float64) [][]float64 {if len(startTimes) == 0 {return nil}var clusters [][]float64currentCluster := []float64{startTimes[0]}for i := 1; i < len(startTimes); i++ {gap := startTimes[i] - startTimes[i-1]if gap <= gapThreshold {// Same clustercurrentCluster = append(currentCluster, startTimes[i])} else {// New clusterclusters = append(clusters, currentCluster)currentCluster = []float64{startTimes[i]}}}// Don't forget the last clusterclusters = append(clusters, currentCluster)return clusters}// predFileSpeciesKey groups detections by file and ebird code.// Used by clusterDetections in calls_from_preds.go.type predFileSpeciesKey struct {File stringEbirdCode string}// clusterDetections groups detections into clusters and produces sorted ClusteredCalls.func clusterDetections(detections map[predFileSpeciesKey][]float64, clipDuration, gapThreshold float64, minDetections int) ([]ClusteredCall, map[string]int) {var allCalls []ClusteredCallspeciesCount := make(map[string]int)for key, startTimes := range detections {sort.Float64s(startTimes)clusters := clusterStartTimes(startTimes, gapThreshold)for _, cluster := range clusters {if len(cluster) <= minDetections {continue}call := ClusteredCall{File: key.File,StartTime: cluster[0],EndTime: cluster[len(cluster)-1] + clipDuration,EbirdCode: key.EbirdCode,Segments: len(cluster),}allCalls = append(allCalls, call)speciesCount[key.EbirdCode]++ - edit in tools/calls/calls_from_common.go at line 450
sort.Slice(allCalls, func(i, j int) bool {if allCalls[i].File != allCalls[j].File {return allCalls[i].File < allCalls[j].File}return allCalls[i].StartTime < allCalls[j].StartTime})return allCalls, speciesCount - edit in me.txt at line 18[6.726837][6.726837]
- Rename `tools/import/` → `tools/ingest/` and `package imp` → `package ingest`. High churn, low semantic gain. Consider only if the name starts causing confusion in daily work.