}
}
}
// ClusteredCall represents a clustered bird call detection.
// Shared by all call source implementations (preds, birda, raven).
type ClusteredCall struct {
File string `json:"file"`
StartTime float64 `json:"start_time"`
EndTime float64 `json:"end_time"`
EbirdCode string `json:"ebird_code"`
Segments int `json:"segments"`
}
// DirCache caches directory entries for fast WAV file lookup.
// Scans the directory once and builds a map from lowercased basename to full filename.
// Safe for concurrent read-only use after construction.
type DirCache struct {
dir string
wavMap map[string]string // lowercase basename -> filename with original case (e.g. "20230610_150000" -> "20230610_150000.WAV")
dirMap map[string]string // lowercase basename -> filename for any file (used by from-raven for .selections.txt etc.)
}
// NewDirCache creates a DirCache by scanning the directory once.
func NewDirCache(dir string) *DirCache {
entries, err := os.ReadDir(dir)
if err != nil {
return &DirCache{dir: dir, wavMap: make(map[string]string), dirMap: make(map[string]string)}
}
wavMap := make(map[string]string, len(entries))
dirMap := make(map[string]string, len(entries))
for _, entry := range entries {
if entry.IsDir() {
continue
}
name := entry.Name()
ext := filepath.Ext(name)
base := strings.TrimSuffix(name, ext)
dirMap[strings.ToLower(base)] = name
if strings.EqualFold(ext, ".wav") {
wavMap[strings.ToLower(base)] = name
}
}
return &DirCache{dir: dir, wavMap: wavMap, dirMap: dirMap}
}
// FindWAV looks up a WAV file by basename (case-insensitive).
// Returns the full path with correct case, or empty string if not found.
func (dc *DirCache) FindWAV(baseName string) string {
if name, ok := dc.wavMap[strings.ToLower(baseName)]; ok {
return filepath.Join(dc.dir, name)
}
return ""
}
// FindFile looks up any file by basename (case-insensitive).
// Returns the full path with correct case, or empty string if not found.
func (dc *DirCache) FindFile(baseName string) string {
if name, ok := dc.dirMap[strings.ToLower(baseName)]; ok {
return filepath.Join(dc.dir, name)
}
return ""
}
// findWAVFile finds a WAV file in the directory with case-insensitive matching.
// baseName is the filename without extension (e.g., "20230610_150000").
// Returns the full path with correct case, or empty string if not found.
// Deprecated: Use DirCache.FindWAV for batch operations to avoid repeated directory scans.
func findWAVFile(dir, baseName string) string {
entries, err := os.ReadDir(dir)
if err != nil {
return ""
}
for _, entry := range entries {
if entry.IsDir() {
continue
}
name := entry.Name()
ext := filepath.Ext(name)
nameNoExt := strings.TrimSuffix(name, ext)
if nameNoExt == baseName && strings.EqualFold(ext, ".wav") {
return filepath.Join(dir, name)
}
}
return ""
}
// ParseFilterFromFilename extracts filter name from preds CSV filename.
// "predsST_opensoundscape-kiwi-1.2_2025-11-12.csv" -> "opensoundscape-kiwi-1.2"
// Returns empty string if parsing fails.
func ParseFilterFromFilename(csvPath string) string {
filename := filepath.Base(csvPath)
// Remove .csv extension
name := strings.TrimSuffix(filename, ".csv")
// Split on underscore
parts := strings.Split(name, "_")
if len(parts) == 3 {
return parts[1]
}
return ""
}
// clusterStartTimes groups consecutive start times into clusters
// where the gap between consecutive times is <= gapThreshold.
func clusterStartTimes(startTimes []float64, gapThreshold float64) [][]float64 {
if len(startTimes) == 0 {
return nil
}
var clusters [][]float64
currentCluster := []float64{startTimes[0]}
for i := 1; i < len(startTimes); i++ {
gap := startTimes[i] - startTimes[i-1]
if gap <= gapThreshold {
// Same cluster
currentCluster = append(currentCluster, startTimes[i])
} else {
// New cluster
clusters = append(clusters, currentCluster)
currentCluster = []float64{startTimes[i]}
}
}
// Don't forget the last cluster
clusters = append(clusters, currentCluster)
return clusters
}
// predFileSpeciesKey groups detections by file and ebird code.
// Used by clusterDetections in calls_from_preds.go.
type predFileSpeciesKey struct {
File string
EbirdCode string
}
// clusterDetections groups detections into clusters and produces sorted ClusteredCalls.
func clusterDetections(detections map[predFileSpeciesKey][]float64, clipDuration, gapThreshold float64, minDetections int) ([]ClusteredCall, map[string]int) {
var allCalls []ClusteredCall
speciesCount := make(map[string]int)
for key, startTimes := range detections {
sort.Float64s(startTimes)
clusters := clusterStartTimes(startTimes, gapThreshold)
for _, cluster := range clusters {
if len(cluster) <= minDetections {
continue
}
call := ClusteredCall{
File: key.File,
StartTime: cluster[0],
EndTime: cluster[len(cluster)-1] + clipDuration,
EbirdCode: key.EbirdCode,
Segments: len(cluster),
}
allCalls = append(allCalls, call)
speciesCount[key.EbirdCode]++