RFSUR7ZEXTQNHH3IFJAL2NNOTGRPWOWB3PFIVH7VLI2JPTIBMW5AC CLUSTER_GAP_MULTIPLIER = 3 // Gap threshold = CLUSTER_GAP_MULTIPLIER * clip_durationMIN_DETECTIONS_PER_CLUSTER = 1 // Minimum detections per cluster (1 = filter single detections)
CLUSTER_GAP_MULTIPLIER = 3 // Gap threshold = CLUSTER_GAP_MULTIPLIER * clip_durationMIN_DETECTIONS_PER_CLUSTER = 1 // Minimum detections per cluster (1 = filter single detections)DEFAULT_CERTAINTY = 70
CSVPath string `json:"csv_path" jsonschema:"required,Path to predictions CSV file"`
CSVPath string `json:"csv_path" jsonschema:"required,Path to predictions CSV file"`Filter string `json:"filter" jsonschema:"Filter name for .data files"`WriteDotData bool `json:"write_dot_data" jsonschema:"Write .data files alongside audio files"`
Calls []ClusteredCall `json:"calls"`TotalCalls int `json:"total_calls"`ClipDuration float64 `json:"clip_duration"`GapThreshold float64 `json:"gap_threshold"`SpeciesCount map[string]int `json:"species_count"`Error *string `json:"error,omitempty"`
Calls []ClusteredCall `json:"calls"`TotalCalls int `json:"total_calls"`ClipDuration float64 `json:"clip_duration"`GapThreshold float64 `json:"gap_threshold"`SpeciesCount map[string]int `json:"species_count"`DataFilesWritten int `json:"data_files_written"`DataFilesSkipped int `json:"data_files_skipped"`Filter string `json:"filter"`Error *string `json:"error,omitempty"`}// AviaNZ .data file types// AviaNZMeta is the metadata element in a .data filetype AviaNZMeta struct {Operator string `json:"Operator"`Reviewer *string `json:"Reviewer,omitempty"`Duration float64 `json:"Duration"`}// AviaNZLabel represents a species label in a segmenttype AviaNZLabel struct {Species string `json:"species"`Certainty int `json:"certainty"`Filter string `json:"filter"`
// Write .data files if requestedif input.WriteDotData {dataFilesWritten, dataFilesSkipped, err := writeDotFiles(input.CSVPath, input.Filter, allCalls)if err != nil {// Log error but don't fail - still return the callserrMsg := fmt.Sprintf("Error writing .data files: %v", err)output.Error = &errMsg}output.DataFilesWritten = dataFilesWrittenoutput.DataFilesSkipped = dataFilesSkipped}
}// extractFilename extracts just the filename from a path// "./C05/2025-11-08/20250518_210000.WAV" -> "20250518_210000.WAV"func extractFilename(path string) string {return filepath.Base(path)}// writeDotFiles writes AviaNZ .data files for each audio file with callsfunc writeDotFiles(csvPath, filter string, calls []ClusteredCall) (int, int, error) {// Base directory is the directory containing the CSV filecsvDir := filepath.Dir(csvPath)// Group calls by file (using extracted filename)callsByFile := make(map[string][]ClusteredCall)for _, call := range calls {filename := extractFilename(call.File)callsByFile[filename] = append(callsByFile[filename], call)}dataFilesWritten := 0dataFilesSkipped := 0for filename, fileCalls := range callsByFile {// Resolve WAV path relative to CSV directorywavPath := filepath.Join(csvDir, filename)dataPath := wavPath + ".data"// Read WAV metadata for duration and sample ratemetadata, err := utils.ParseWAVHeader(wavPath)if err != nil {// Skip files we can't read (might not exist at this location)dataFilesSkipped++continue}// Build AviaNZ .data structuredataFile := buildAviaNZDataFile(fileCalls, filter, metadata.Duration, metadata.SampleRate)// Write .data fileif err := writeAviaNZDataFile(dataPath, dataFile); err != nil {return dataFilesWritten, dataFilesSkipped, fmt.Errorf("failed to write %s: %w", dataPath, err)}dataFilesWritten++}return dataFilesWritten, dataFilesSkipped, nil}// buildAviaNZDataFile creates an AviaNZ .data structure from callsfunc buildAviaNZDataFile(calls []ClusteredCall, filter string, duration float64, sampleRate int) []interface{} {// Create metadatareviewer := "None"meta := AviaNZMeta{Operator: "Auto",Reviewer: &reviewer,Duration: duration,}// Build segments arrayvar segments []AviaNZSegmentfor _, call := range calls {// Create labels for this segmentlabels := []AviaNZLabel{{Species: call.EbirdCode,Certainty: DEFAULT_CERTAINTY,Filter: filter,},}// Create segment: [start, end, freq_low, freq_high, labels]// freq_low=0, freq_high=sampleRate for full-band segmentssegment := AviaNZSegment{call.StartTime,call.EndTime,0, // freq_lowsampleRate, // freq_high (full band)labels,}segments = append(segments, segment)}// Build final structure: [meta, segment, segment, ...]result := make([]interface{}, 0, 1+len(segments))result = append(result, meta)for _, seg := range segments {result = append(result, seg)}return result
// writeAviaNZDataFile writes the .data file to diskfunc writeAviaNZDataFile(path string, data []interface{}) error {file, err := os.Create(path)if err != nil {return fmt.Errorf("failed to create file: %w", err)}defer file.Close()encoder := json.NewEncoder(file)encoder.SetIndent("", "") // No indentation for compact output
if err := encoder.Encode(data); err != nil {return fmt.Errorf("failed to encode JSON: %w", err)}return nil}// ParseFilterFromFilename extracts filter name from preds CSV filename// "predsST_opensoundscape-kiwi-1.2_2025-11-12.csv" -> "opensoundscape-kiwi-1.2"// Returns empty string if parsing failsfunc ParseFilterFromFilename(csvPath string) string {filename := filepath.Base(csvPath)// Remove .csv extensionname := strings.TrimSuffix(filename, ".csv")// Split on underscoreparts := strings.Split(name, "_")if len(parts) == 3 {return parts[1]}return ""}
fmt.Fprintf(os.Stderr, " skraak calls from-preds --csv predictions.csv > calls.json\n")fmt.Fprintf(os.Stderr, " skraak calls from-preds --csv preds.csv | jq '.calls[] | select(.ebird_code==\"kea1\")'\n")
fmt.Fprintf(os.Stderr, " skraak calls from-preds --csv predictions.csv\n")fmt.Fprintf(os.Stderr, " skraak calls from-preds --csv preds.csv --dot-data=false > calls.json\n")fmt.Fprintf(os.Stderr, " skraak calls from-preds --csv preds.csv --filter my-filter\n")
fmt.Fprintf(os.Stderr, " JSON with clustered calls (file, start_time, end_time, ebird_code, detections)\n")
fmt.Fprintf(os.Stderr, " With --dot-data=true (default): Writes .data files alongside audio files, outputs JSON summary\n")fmt.Fprintf(os.Stderr, " With --dot-data=false: Outputs JSON with clustered calls only (no .data files)\n")fmt.Fprintf(os.Stderr, "\nFilter name:\n")fmt.Fprintf(os.Stderr, " If --filter is provided, uses that value.\n")fmt.Fprintf(os.Stderr, " Otherwise, parses from CSV filename: prefix_filter_date.csv -> filter\n")fmt.Fprintf(os.Stderr, " Example: predsST_opensoundscape-kiwi-1.2_2025-11-12.csv -> opensoundscape-kiwi-1.2\n")
fmt.Fprintf(os.Stderr, " skraak calls from-preds --csv predictions.csv > calls.json\n")fmt.Fprintf(os.Stderr, " skraak calls from-preds --csv preds.csv | jq '.calls[] | select(.ebird_code==\"kea1\")'\n")
fmt.Fprintf(os.Stderr, " # Write .data files (default)\n")fmt.Fprintf(os.Stderr, " skraak calls from-preds --csv predictions.csv\n")fmt.Fprintf(os.Stderr, "\n")fmt.Fprintf(os.Stderr, " # JSON output only (no .data files)\n")fmt.Fprintf(os.Stderr, " skraak calls from-preds --csv predictions.csv --dot-data=false > calls.json\n")fmt.Fprintf(os.Stderr, "\n")fmt.Fprintf(os.Stderr, " # Override filter name\n")fmt.Fprintf(os.Stderr, " skraak calls from-preds --csv preds.csv --filter my-custom-filter\n")
}// Determine filter namefilterName := *filterif filterName == "" {filterName = tools.ParseFilterFromFilename(*csvPath)if filterName == "" {fmt.Fprintf(os.Stderr, "Error: Could not parse filter from filename. Use --filter flag.\n")fmt.Fprintf(os.Stderr, "Expected format: prefix_filter_date.csv (e.g., predsST_opensoundscape-kiwi-1.2_2025-11-12.csv)\n")os.Exit(1)}
fmt.Fprintf(os.Stderr, "Extracting calls from predictions: %s\n", *csvPath)
if *dotData {fmt.Fprintf(os.Stderr, "Extracting calls from predictions: %s\n", *csvPath)fmt.Fprintf(os.Stderr, "Filter: %s\n", filterName)fmt.Fprintf(os.Stderr, "Writing .data files: enabled\n")} else {fmt.Fprintf(os.Stderr, "Extracting calls from predictions: %s\n", *csvPath)fmt.Fprintf(os.Stderr, "Filter: %s\n", filterName)fmt.Fprintf(os.Stderr, "Writing .data files: disabled (--dot-data=false)\n")}