calls_clip.go
package calls
import (
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"skraak/audio"
"skraak/datafile"
"skraak/spectrogram"
"skraak/utils"
"skraak/wav"
)
// CallsClipInput defines the input for the clip tool
type CallsClipInput struct {
File string `json:"file"`
Folder string `json:"folder"`
Output string `json:"output"`
Prefix string `json:"prefix"`
Filter string `json:"filter"`
Species string `json:"species"`
Certainty int `json:"certainty"`
Size int `json:"size"`
Color bool `json:"color"`
Night bool `json:"night"`
Day bool `json:"day"`
Location string `json:"location,omitempty"`
}
// CallsClipOutput defines the output for the clip tool
type CallsClipOutput struct {
FilesProcessed int `json:"files_processed"`
SegmentsClipped int `json:"segments_clipped"`
NightSkipped int `json:"night_skipped,omitempty"`
DaySkipped int `json:"day_skipped,omitempty"`
OutputFiles []string `json:"output_files"`
Errors []string `json:"errors,omitempty"`
}
// CallsClip processes .data files and generates audio/image clips for matching segments
func CallsClip(input CallsClipInput) (CallsClipOutput, error) {
var output CallsClipOutput
// Validate required flags
if err := validateClipInput(&output, input); err != nil {
return output, err
}
// Parse species+calltype
speciesName, callType := datafile.ParseSpeciesCallType(input.Species)
// Get list of .data files
filePaths, err := resolveClipFiles(&output, input)
if err != nil {
return output, err
}
// Create output folder if it doesn't exist
if err := os.MkdirAll(input.Output, 0755); err != nil {
output.Errors = append(output.Errors, fmt.Sprintf("failed to create output folder: %v", err))
return output, err
}
// Clamp image size to valid range
imgSize := spectrogram.ClampImageSize(input.Size)
// Parse location into lat/lng/timezone
var lat, lng float64
var timezone string
if input.Location != "" {
var err error
lat, lng, timezone, err = utils.ParseLocation(input.Location)
if err != nil {
output.Errors = append(output.Errors, err.Error())
return output, err
}
}
// Process .data files (parallel for larger batches)
if len(filePaths) <= 2 {
processFilesSequential(&output, filePaths, input, speciesName, callType, imgSize, lat, lng, timezone)
} else {
processFilesParallel(&output, filePaths, input, speciesName, callType, imgSize, lat, lng, timezone)
}
return output, nil
}
// validateClipInput validates required flags for clip generation.
func validateClipInput(output *CallsClipOutput, input CallsClipInput) error {
if input.File == "" && input.Folder == "" {
output.Errors = append(output.Errors, "either --file or --folder is required")
return fmt.Errorf("missing required flag: --file or --folder")
}
if input.Output == "" {
output.Errors = append(output.Errors, "--output is required")
return fmt.Errorf("missing required flag: --output")
}
if input.Prefix == "" {
output.Errors = append(output.Errors, "--prefix is required")
return fmt.Errorf("missing required flag: --prefix")
}
return nil
}
// resolveClipFiles returns the list of .data file paths from input.
func resolveClipFiles(output *CallsClipOutput, input CallsClipInput) ([]string, error) {
if input.File != "" {
return []string{input.File}, nil
}
filePaths, err := datafile.FindDataFiles(input.Folder)
if err != nil {
output.Errors = append(output.Errors, fmt.Sprintf("failed to find .data files: %v", err))
return nil, err
}
if len(filePaths) == 0 {
output.Errors = append(output.Errors, "no .data files found")
return nil, fmt.Errorf("no .data files found")
}
return filePaths, nil
}
// processFilesSequential processes .data files one at a time.
func processFilesSequential(output *CallsClipOutput, filePaths []string, input CallsClipInput, speciesName, callType string, imgSize int, lat, lng float64, timezone string) {
for _, dataPath := range filePaths {
clips, skipped, errs := processFile(dataPath, input.Output, input.Prefix, input.Filter, speciesName, callType, input.Certainty, imgSize, input.Color, input.Night, input.Day, lat, lng, timezone)
accumulateFileResult(output, clips, skipped, errs, input.Night)
}
}
// processFilesParallel processes .data files using worker goroutines.
func processFilesParallel(output *CallsClipOutput, filePaths []string, input CallsClipInput, speciesName, callType string, imgSize int, lat, lng float64, timezone string) {
type fileResult struct {
clips []string
skipped int
errs []string
}
workers := min(runtime.NumCPU(), 8, len(filePaths))
jobs := make(chan string, len(filePaths))
results := make(chan fileResult, len(filePaths))
var wg sync.WaitGroup
for range workers {
wg.Go(func() {
for dataPath := range jobs {
clips, skipped, errs := processFile(dataPath, input.Output, input.Prefix, input.Filter, speciesName, callType, input.Certainty, imgSize, input.Color, input.Night, input.Day, lat, lng, timezone)
results <- fileResult{clips: clips, skipped: skipped, errs: errs}
}
})
}
for _, dataPath := range filePaths {
jobs <- dataPath
}
close(jobs)
go func() {
wg.Wait()
close(results)
}()
for r := range results {
accumulateFileResult(output, r.clips, r.skipped, r.errs, input.Night)
}
}
// accumulateFileResult merges a single file's results into the output.
func accumulateFileResult(output *CallsClipOutput, clips []string, skipped int, errs []string, night bool) {
output.SegmentsClipped += len(clips)
if night {
output.NightSkipped += skipped
} else {
output.DaySkipped += skipped
}
output.OutputFiles = append(output.OutputFiles, clips...)
output.Errors = append(output.Errors, errs...)
if len(clips) > 0 || len(errs) == 0 {
output.FilesProcessed++
}
}
// processFile processes a single .data file and returns generated clips, time-filter-skipped count, and errors
func processFile(dataPath, outputDir, prefix, filter, speciesName, callType string, certainty, imgSize int, color, night, day bool, lat, lng float64, timezone string) ([]string, int, []string) {
var clips []string
var errors []string
// Parse .data file
dataFile, err := datafile.ParseDataFile(dataPath)
if err != nil {
errors = append(errors, fmt.Sprintf("%s: failed to parse: %v", dataPath, err))
return nil, 0, errors
}
// Get WAV basename (without path and extensions)
wavPath := filepath.Clean(strings.TrimSuffix(dataPath, ".data"))
basename := filepath.Base(wavPath)
basename = strings.TrimSuffix(basename, filepath.Ext(basename))
// Filter segments
matchingSegments := filterSegments(dataFile.Segments, filter, speciesName, callType, certainty)
if len(matchingSegments) == 0 {
return nil, 0, nil
}
// Day/night filter: check WAV header only (cheaper than reading full audio).
if night || day {
skipped, err := checkDayNightFilter(wavPath, night, day, lat, lng, timezone)
if err != nil || skipped {
if skipped {
return nil, 1, nil
}
return nil, 0, nil
}
}
// Read WAV samples once
samples, sampleRate, err := wav.ReadWAVSamples(wavPath)
if err != nil {
errors = append(errors, fmt.Sprintf("%s: failed to read WAV: %v", dataPath, err))
return nil, 0, errors
}
// Process matching segments
clips, errors = processSegments(matchingSegments, dataPath, samples, sampleRate, outputDir, prefix, basename, imgSize, color)
return clips, 0, errors
}
// filterSegments returns segments matching the given filter criteria.
func filterSegments(segments []*datafile.Segment, filter, speciesName, callType string, certainty int) []*datafile.Segment {
var matching []*datafile.Segment
for _, seg := range segments {
if seg.SegmentMatchesFilters(filter, speciesName, callType, certainty) {
matching = append(matching, seg)
}
}
return matching
}
// checkDayNightFilter applies day/night filtering. Returns (skipped=true, nil) if the
// recording should be skipped, (false, nil) if it passes, or (false, err) on failure.
func checkDayNightFilter(wavPath string, night, day bool, lat, lng float64, timezone string) (bool, error) {
result, err := IsNight(IsNightInput{
FilePath: wavPath,
Lat: lat,
Lng: lng,
Timezone: timezone,
})
if err != nil {
fmt.Fprintf(os.Stderr, "warning: skipping %s (isnight error: %v)\n", wavPath, err)
return false, err
}
if night && !result.SolarNight {
fmt.Fprintf(os.Stderr, "skipped (daytime): %s\n", wavPath)
return true, nil
}
if day && !result.DiurnalActive {
fmt.Fprintf(os.Stderr, "skipped (nighttime): %s\n", wavPath)
return true, nil
}
return false, nil
}
// processSegments generates clips for matching segments, using parallel processing for larger batches.
func processSegments(segments []*datafile.Segment, dataPath string, samples []float64, sampleRate int, outputDir, prefix, basename string, imgSize int, color bool) ([]string, []string) {
var clips []string
var errors []string
if len(segments) <= 2 {
for _, seg := range segments {
clipFiles, err := generateClip(samples, sampleRate, outputDir, prefix, basename, seg.StartTime, seg.EndTime, imgSize, color)
if err != nil {
errors = append(errors, fmt.Sprintf("%s: segment %.0f-%.0f: %v", dataPath, seg.StartTime, seg.EndTime, err))
continue
}
clips = append(clips, clipFiles...)
}
} else {
clips, errors = processSegmentsParallel(segments, dataPath, samples, sampleRate, outputDir, prefix, basename, imgSize, color)
}
return clips, errors
}
// processSegmentsParallel generates clips for segments using worker goroutines.
func processSegmentsParallel(segments []*datafile.Segment, dataPath string, samples []float64, sampleRate int, outputDir, prefix, basename string, imgSize int, color bool) ([]string, []string) {
type segResult struct {
clips []string
err string
}
workers := min(runtime.NumCPU(), len(segments))
jobs := make(chan *datafile.Segment, len(segments))
results := make(chan segResult, len(segments))
var wg sync.WaitGroup
for range workers {
wg.Go(func() {
for seg := range jobs {
clipFiles, err := generateClip(samples, sampleRate, outputDir, prefix, basename, seg.StartTime, seg.EndTime, imgSize, color)
if err != nil {
results <- segResult{err: fmt.Sprintf("%s: segment %.0f-%.0f: %v", dataPath, seg.StartTime, seg.EndTime, err)}
} else {
results <- segResult{clips: clipFiles}
}
}
})
}
for _, seg := range segments {
jobs <- seg
}
close(jobs)
go func() {
wg.Wait()
close(results)
}()
var clips []string
var errors []string
for r := range results {
if r.err != "" {
errors = append(errors, r.err)
} else {
clips = append(clips, r.clips...)
}
}
return clips, errors
}
// generateClip generates PNG and WAV files for a segment
func generateClip(samples []float64, sampleRate int, outputDir, prefix, basename string, startTime, endTime float64, imgSize int, color bool) ([]string, error) {
// Build paths (ClipPaths checks for existing files)
pngPath, wavPath, err := spectrogram.ClipPaths(outputDir, prefix, basename, startTime, endTime)
if err != nil {
return nil, err
}
// Extract segment samples
segSamples := spectrogram.ExtractSegmentSamples(samples, sampleRate, startTime, endTime)
if len(segSamples) == 0 {
return nil, fmt.Errorf("no samples in segment")
}
// Downsample if > 16kHz
if sampleRate > audio.DefaultMaxSampleRate {
segSamples = audio.ResampleRate(segSamples, sampleRate, audio.DefaultMaxSampleRate)
sampleRate = audio.DefaultMaxSampleRate
}
// Generate spectrogram image
img := spectrogram.SpectrogramImageFromSamples(segSamples, sampleRate, color, imgSize)
if img == nil {
return nil, fmt.Errorf("failed to generate spectrogram")
}
// Write PNG
if err := spectrogram.WritePNGFile(pngPath, img); err != nil {
return nil, err
}
// Write WAV
if err := wav.WriteWAVFile(wavPath, segSamples, sampleRate); err != nil {
return nil, fmt.Errorf("failed to write WAV: %w", err)
}
return []string{pngPath, wavPath}, nil
}