quietlight/skraak_mcp - Change EBCNGTNVY2YFFHKC4PHDEHNBOWJ4JDCEXTTJ3WKD5VWQZLLDZ65AC

new cli cmd calls from preds

Created by quietlight on February 21, 2026

EBCNGTNVY2YFFHKC4PHDEHNBOWJ4JDCEXTTJ3WKD5VWQZLLDZ65AC

Dependencies

In channels

main

Change contents

File addition: calls_from_preds.go (----------)

[4.22715]

package tools
import (
	"encoding/csv"
	"fmt"
	"io"
	"os"
	"sort"
	"strconv"
)
// Constants for clustering algorithm
const (
	CLUSTER_GAP_MULTIPLIER      = 3 // Gap threshold = CLUSTER_GAP_MULTIPLIER * clip_duration
	MIN_DETECTIONS_PER_CLUSTER  = 1 // Minimum detections per cluster (1 = filter single detections)
)
// ClusteredCall represents a clustered bird call detection
type ClusteredCall struct {
	File       string  `json:"file"`
	StartTime  float64 `json:"start_time"`
	EndTime    float64 `json:"end_time"`
	EbirdCode  string  `json:"ebird_code"`
	Detections int     `json:"detections"`
}
// CallsFromPredsInput defines the input for the calls-from-preds tool
type CallsFromPredsInput struct {
	CSVPath string `json:"csv_path" jsonschema:"required,Path to predictions CSV file"`
}
// CallsFromPredsOutput defines the output for the calls-from-preds tool
type CallsFromPredsOutput struct {
	Calls         []ClusteredCall `json:"calls"`
	TotalCalls    int             `json:"total_calls"`
	TotalClusters int             `json:"total_clusters"`
	ClipDuration  float64         `json:"clip_duration"`
	GapThreshold  float64         `json:"gap_threshold"`
	SpeciesCount  map[string]int  `json:"species_count"`
	FilesCount    int             `json:"files_count"`
	Error         *string         `json:"error,omitempty"`
}
// CallsFromPreds reads a predictions CSV and clusters detections into continuous bird calls
func CallsFromPreds(input CallsFromPredsInput) (CallsFromPredsOutput, error) {
	var output CallsFromPredsOutput
	// Open CSV file
	file, err := os.Open(input.CSVPath)
	if err != nil {
		errMsg := fmt.Sprintf("Failed to open CSV file: %v", err)
		output.Error = &errMsg
		return output, fmt.Errorf("%s", errMsg)
	}
	defer file.Close()
	// Read CSV
	reader := csv.NewReader(file)
	reader.ReuseRecord = true // Memory optimization for large files
	// Read header
	header, err := reader.Read()
	if err != nil {
		errMsg := fmt.Sprintf("Failed to read CSV header: %v", err)
		output.Error = &errMsg
		return output, fmt.Errorf("%s", errMsg)
	}
	// Find column indices
	fileIdx := -1
	startTimeIdx := -1
	endTimeIdx := -1
	var ebirdCodes []string
	var ebirdIdx []int
	for i, col := range header {
		switch col {
		case "file":
			fileIdx = i
		case "start_time":
			startTimeIdx = i
		case "end_time":
			endTimeIdx = i
		default:
			// All other columns are ebird codes
			ebirdCodes = append(ebirdCodes, col)
			ebirdIdx = append(ebirdIdx, i)
		}
	}
	if fileIdx == -1 || startTimeIdx == -1 || endTimeIdx == -1 {
		errMsg := "CSV must have 'file', 'start_time', and 'end_time' columns"
		output.Error = &errMsg
		return output, fmt.Errorf("%s", errMsg)
	}
	if len(ebirdCodes) == 0 {
		errMsg := "CSV must have at least one ebird code column"
		output.Error = &errMsg
		return output, fmt.Errorf("%s", errMsg)
	}
	// Read all rows and organize by (file, ebird_code) -> start_times
	// Using maps for efficient grouping
	type FileEbirdKey struct {
		File      string
		EbirdCode string
	}
	detections := make(map[FileEbirdKey][]float64)
	clipDuration := 0.0
	filesSeen := make(map[string]bool)
	// Read first row to get clip duration
	record, err := reader.Read()
	if err != nil && err != io.EOF {
		errMsg := fmt.Sprintf("Failed to read first CSV row: %v", err)
		output.Error = &errMsg
		return output, fmt.Errorf("%s", errMsg)
	}
	if err != io.EOF {
		startTime, _ := strconv.ParseFloat(record[startTimeIdx], 64)
		endTime, _ := strconv.ParseFloat(record[endTimeIdx], 64)
		clipDuration = endTime - startTime
		output.ClipDuration = clipDuration
		// Process first row
		fileName := record[fileIdx]
		filesSeen[fileName] = true
		for i, idx := range ebirdIdx {
			if record[idx] == "1" {
				key := FileEbirdKey{File: fileName, EbirdCode: ebirdCodes[i]}
				detections[key] = append(detections[key], startTime)
			}
		}
		// Read remaining rows
		for {
			record, err := reader.Read()
			if err == io.EOF {
				break
			}
			if err != nil {
				errMsg := fmt.Sprintf("Failed to read CSV row: %v", err)
				output.Error = &errMsg
				return output, fmt.Errorf("%s", errMsg)
			}
			startTime, _ := strconv.ParseFloat(record[startTimeIdx], 64)
			fileName := record[fileIdx]
			filesSeen[fileName] = true
			for i, idx := range ebirdIdx {
				if record[idx] == "1" {
					key := FileEbirdKey{File: fileName, EbirdCode: ebirdCodes[i]}
					detections[key] = append(detections[key], startTime)
				}
			}
		}
	}
	// Calculate gap threshold
	gapThreshold := float64(CLUSTER_GAP_MULTIPLIER) * clipDuration
	output.GapThreshold = gapThreshold
	output.FilesCount = len(filesSeen)
	// Cluster detections by (file, ebird_code)
	var allCalls []ClusteredCall
	speciesCount := make(map[string]int)
	for key, startTimes := range detections {
		// Sort start times
		sort.Float64s(startTimes)
		// Cluster consecutive detections
		clusters := clusterStartTimes(startTimes, gapThreshold)
		// Convert clusters to calls
		for _, cluster := range clusters {
			if len(cluster) <= MIN_DETECTIONS_PER_CLUSTER {
				continue
			}
			call := ClusteredCall{
				File:       key.File,
				StartTime:  cluster[0],
				EndTime:    cluster[len(cluster)-1] + clipDuration,
				EbirdCode:  key.EbirdCode,
				Detections: len(cluster),
			}
			allCalls = append(allCalls, call)
			speciesCount[key.EbirdCode]++
		}
	}
	// Sort calls by file, then start time
	sort.Slice(allCalls, func(i, j int) bool {
		if allCalls[i].File != allCalls[j].File {
			return allCalls[i].File < allCalls[j].File
		}
		return allCalls[i].StartTime < allCalls[j].StartTime
	})
	output.Calls = allCalls
	output.TotalCalls = len(allCalls)
	output.TotalClusters = len(allCalls)
	output.SpeciesCount = speciesCount
	return output, nil
}
// clusterStartTimes groups consecutive start times into clusters
// where the gap between consecutive times is <= gapThreshold
func clusterStartTimes(startTimes []float64, gapThreshold float64) [][]float64 {
	if len(startTimes) == 0 {
		return nil
	}
	var clusters [][]float64
	currentCluster := []float64{startTimes[0]}
	for i := 1; i < len(startTimes); i++ {
		gap := startTimes[i] - startTimes[i-1]
		if gap <= gapThreshold {
			// Same cluster
			currentCluster = append(currentCluster, startTimes[i])
		} else {
			// New cluster
			clusters = append(clusters, currentCluster)
			currentCluster = []float64{startTimes[i]}
		}
	}
	// Don't forget the last cluster
	clusters = append(clusters, currentCluster)
	return clusters
}

Insertion in main.go at line 40 [4.149392]

[5.21996]

[6.2996]

	case "calls-from-preds":
		cmd.RunCallsFromPreds(os.Args[2:])

Insertion in main.go at line 66 [4.149392]

[5.22070]

[6.3165]

	fmt.Fprintf(os.Stderr, "  calls-from-preds  Extract clustered calls from ML predictions CSV\n")

Insertion in main.go at line 77 [4.149392]

[5.22187]

[6.3305]

	fmt.Fprintf(os.Stderr, "  %s calls-from-preds --csv predictions.csv > calls.json\n", os.Args[0])

Replacement in cmd/cluster.go at line 14 [7.15264]

B:BD[7.15350] → [7.15350:15430]

// RunCluster handles the "cluster" subcommand
func RunCluster(args []string) {

[7.15350]

[7.15430]

// RunRecordingCluster handles the "cluster" subcommand (for database cluster records)
func RunRecordingCluster(args []string) {

Replacement in cmd/cluster.go at line 17 [7.15264]
B:BD[7.15450] → [7.15450:15472]
```
		printClusterUsage()
```
[7.15450]
[7.15472]
```
		printRecordingClusterUsage()
```
Replacement in cmd/cluster.go at line 28 [7.15264]
B:BD[7.15679] → [7.15679:15701]
```
		printClusterUsage()
```
[7.15679]
[7.15701]
```
		printRecordingClusterUsage()
```
Replacement in cmd/cluster.go at line 33 [7.15264]
B:BD[7.15720] → [7.15720:15747]
```
func printClusterUsage() {
```
[7.15720]
[7.15747]
```
func printRecordingClusterUsage() {
```

Replacement in cmd/cluster.go at line 40 [7.15264]

B:BD[7.16188] → [7.16188:16305]

	fmt.Fprintf(os.Stderr, "  skraak cluster update --db ./db/skraak.duckdb --id cluster123 --name \"Updated Name\"\n")

[7.16188]

[7.16305]

	fmt.Fprintf(os.Stderr, "  skraak cluster update --db ./db/skraak.duckdb --id clust123 --name \"New Name\"\n")

Replacement in cmd/cluster.go at line 49 [7.15264]

B:BD[7.16664] → [7.16664:16887]

	sampleRate := fs.Int("sample-rate", 0, "Sample rate in Hz (required, e.g. 250000)")
	path := fs.String("path", "", "Folder path (optional)")
	patternID := fs.String("pattern", "", "Cyclic recording pattern ID (optional)")

[7.16664]

[7.16887]

	sampleRate := fs.String("sample-rate", "", "Sample rate in Hz (required)")

Replacement in cmd/cluster.go at line 54 [7.15264]

B:BD[7.17059] → [7.17059:17131]

		fmt.Fprintf(os.Stderr, "Create a new cluster within a location.\n\n")

[7.17059]

[7.17131]

		fmt.Fprintf(os.Stderr, "Create a new cluster for grouping recordings.\n\n")

Replacement in cmd/cluster.go at line 79 [7.15264]
B:BD[7.17752] → [7.17752:17775]
```
	if *sampleRate == 0 {
```
[7.17752]
[7.17775]
```
	if *sampleRate == "" {
```

Insertion in cmd/cluster.go at line 85 [7.15264]

[7.17934]

		os.Exit(1)
	}
	// Parse sample rate
	sr, err := strconv.Atoi(*sampleRate)
	if err != nil {
		fmt.Fprintf(os.Stderr, "Error: invalid sample rate: %v\n", err)

Deletion in cmd/cluster.go at line 105 [7.15264]
∅:D[5.24692] → [7.17978:18033]
B:BD[7.17978] → [7.17978:18033]
```
	// Build input with optional fields as nil when empty
```

Replacement in cmd/cluster.go at line 106 [7.15264]

B:BD[7.18063] → [7.18063:18384]

		DatasetID:  datasetID,
		LocationID: locationID,
		Name:       name,
		SampleRate: sampleRate,
	}
	// Only set optional fields if they have values
	if *path != "" {
		input.Path = path
	}
	if *patternID != "" {
		input.CyclicRecordingPatternID = patternID
	}
	if *description != "" {
		input.Description = description

[7.18063]

[7.18384]

		DatasetID:   datasetID,
		LocationID:  locationID,
		Name:        name,
		SampleRate:  &sr,
		Description: description,

Replacement in cmd/cluster.go at line 127 [7.15264]

B:BD[7.18843] → [7.18843:19092]

	sampleRateStr := fs.String("sample-rate", "", "New sample rate in Hz (optional)")
	path := fs.String("path", "", "New folder path (optional)")
	patternID := fs.String("pattern", "", "New cyclic recording pattern ID (optional, use empty to clear)")

[7.18843]

[7.19092]

	sampleRate := fs.String("sample-rate", "", "New sample rate in Hz (optional)")

Replacement in cmd/cluster.go at line 136 [7.15264]

B:BD[7.19464] → [7.19464:19578]

		fmt.Fprintf(os.Stderr, "  skraak cluster update --db ./db/skraak.duckdb --id cluster123 --name \"New Name\"\n")

[7.19464]

[7.19578]

		fmt.Fprintf(os.Stderr, "  skraak cluster update --db ./db/skraak.duckdb --id clust123 --name \"New Name\"\n")

Replacement in cmd/cluster.go at line 158 [7.15264]

B:BD[7.19963] → [7.19963:20053]

	var sampleRate *int
	if *sampleRateStr != "" {
		sr, err := strconv.Atoi(*sampleRateStr)

[7.19963]

[7.20053]

	var sr *int
	if *sampleRate != "" {
		srVal, err := strconv.Atoi(*sampleRate)

Replacement in cmd/cluster.go at line 162 [7.15264]

B:BD[7.20071] → [7.20071:20138]

			fmt.Fprintf(os.Stderr, "Error: invalid sample-rate: %v\n", err)

[7.20071]

[7.20138]

			fmt.Fprintf(os.Stderr, "Error: invalid sample rate: %v\n", err)

Replacement in cmd/cluster.go at line 165 [7.15264]
B:BD[7.20156] → [7.20156:20175]
```
		sampleRate = &sr
```
[7.20156]
[7.20175]
```
		sr = &srVal
```
Replacement in cmd/cluster.go at line 178 [7.15264]
∅:D[5.24875] → [7.20206:20259]
B:BD[7.20206] → [7.20206:20259]
```
	// Build input - only set fields that were provided
```
[5.24875]
[7.20259]
```
	// Build input - only set fields that were provided (non-empty)
```
Replacement in cmd/cluster.go at line 185 [7.15264]
B:BD[7.20343] → [7.20343:20399]
```
	if sampleRate != nil {
		input.SampleRate = sampleRate
```
[7.20343]
[7.20399]
```
	if sr != nil {
		input.SampleRate = sr
```

Deletion in cmd/cluster.go at line 188 [7.15264]

B:BD[7.20402] → [7.20402:20514]

	if *path != "" {
		input.Path = path
	}
	if *patternID != "" {
		input.CyclicRecordingPatternID = patternID
	}

File addition: calls_from_preds.go (----------)

[8.1]

package cmd
import (
	"encoding/json"
	"flag"
	"fmt"
	"os"
	"skraak/tools"
)
// RunCallsFromPreds handles the "calls-from-preds" command
func RunCallsFromPreds(args []string) {
	fs := flag.NewFlagSet("calls-from-preds", flag.ExitOnError)
	csvPath := fs.String("csv", "", "Path to predictions CSV file (required)")
	fs.Usage = func() {
		fmt.Fprintf(os.Stderr, "Usage: skraak calls-from-preds [options]\n\n")
		fmt.Fprintf(os.Stderr, "Extract clustered bird calls from ML predictions CSV.\n")
		fmt.Fprintf(os.Stderr, "Reads prediction CSV with columns: file, start_time, end_time, <ebird_codes...>\n")
		fmt.Fprintf(os.Stderr, "Each row is a clip with 1=present, 0=absent for each species.\n\n")
		fmt.Fprintf(os.Stderr, "Options:\n")
		fs.PrintDefaults()
		fmt.Fprintf(os.Stderr, "\nOutput:\n")
		fmt.Fprintf(os.Stderr, "  JSON with clustered calls (file, start_time, end_time, ebird_code, detections)\n")
		fmt.Fprintf(os.Stderr, "\nExamples:\n")
		fmt.Fprintf(os.Stderr, "  skraak calls-from-preds --csv predictions.csv > calls.json\n")
		fmt.Fprintf(os.Stderr, "  skraak calls-from-preds --csv preds.csv | jq '.calls[] | select(.ebird_code==\"kea1\")'\n")
	}
	if err := fs.Parse(args); err != nil {
		os.Exit(1)
	}
	// Validate required flags
	if *csvPath == "" {
		fmt.Fprintf(os.Stderr, "Error: --csv is required\n\n")
		fs.Usage()
		os.Exit(1)
	}
	input := tools.CallsFromPredsInput{
		CSVPath: *csvPath,
	}
	fmt.Fprintf(os.Stderr, "Extracting calls from predictions: %s\n", *csvPath)
	output, err := tools.CallsFromPreds(input)
	if err != nil {
		fmt.Fprintf(os.Stderr, "Error: %v\n", err)
		os.Exit(1)
	}
	fmt.Fprintf(os.Stderr, "Found %d clustered calls across %d species in %d files\n",
		output.TotalCalls, len(output.SpeciesCount), output.FilesCount)
	fmt.Fprintf(os.Stderr, "Clip duration: %.1fs, Gap threshold: %.1fs\n",
		output.ClipDuration, output.GapThreshold)
	// Output JSON to stdout
	enc := json.NewEncoder(os.Stdout)
	enc.SetIndent("", "  ")
	enc.Encode(output)
}

Insertion in CHANGELOG.md at line 4 [9.1]

[9.81]

[3.817]


## [2026-02-22] New CLI Command: calls-from-preds
**New feature:** Extract clustered bird calls from ML predictions CSV files.
**Usage:**
```bash
./skraak calls-from-preds --csv predictions.csv > calls.json
```
**How it works:**
1. Reads prediction CSV (file, start_time, end_time, ebird_code columns with 1/0 values)
2. Auto-detects clip duration from first row
3. Groups detections by (file, ebird_code) and sorts by start_time
4. Clusters consecutive detections where gap ≤ 3 × clip_duration
5. Filters out single detections (configurable via constant)
**Constants (easily changeable):**
```go
CLUSTER_GAP_MULTIPLIER     = 3  // Gap threshold = 3 × clip_duration
MIN_DETECTIONS_PER_CLUSTER = 1  // Filter single detections
```
**Performance:** 400k+ rows processed in ~0.67 seconds

Insertion in CHANGELOG.md at line 29 [9.1]

[3.818]

**Output example:**
```json
{
  "calls": [
    {"file": "path.WAV", "start_time": 0, "end_time": 32, "ebird_code": "tomtit1", "detections": 11}
  ],
  "total_calls": 62593,
  "species_count": {"tomtit1": 12636, ...},
  "files_count": 14017
}
```
**Files:**
- `tools/calls_from_preds.go` — Core clustering logic
- `cmd/calls_from_preds.go` — CLI handler
---

Insertion in .ignore at line 14 [10.1]
[2.96]
```
me.txt
example/
```

new cli cmd calls from preds

Dependencies

In channels

Change contents

File addition: calls_from_preds.go (----------)

Insertion in main.go at line 40 [4.149392]

Insertion in main.go at line 66 [4.149392]

Insertion in main.go at line 77 [4.149392]

Replacement in cmd/cluster.go at line 14 [7.15264]

Replacement in cmd/cluster.go at line 17 [7.15264]

Replacement in cmd/cluster.go at line 28 [7.15264]

Replacement in cmd/cluster.go at line 33 [7.15264]

Replacement in cmd/cluster.go at line 40 [7.15264]

Replacement in cmd/cluster.go at line 49 [7.15264]

Replacement in cmd/cluster.go at line 54 [7.15264]

Replacement in cmd/cluster.go at line 79 [7.15264]

Insertion in cmd/cluster.go at line 85 [7.15264]

Deletion in cmd/cluster.go at line 105 [7.15264]

Replacement in cmd/cluster.go at line 106 [7.15264]

Replacement in cmd/cluster.go at line 127 [7.15264]

Replacement in cmd/cluster.go at line 136 [7.15264]

Replacement in cmd/cluster.go at line 158 [7.15264]

Replacement in cmd/cluster.go at line 162 [7.15264]

Replacement in cmd/cluster.go at line 165 [7.15264]

Replacement in cmd/cluster.go at line 178 [7.15264]

Replacement in cmd/cluster.go at line 185 [7.15264]

Deletion in cmd/cluster.go at line 188 [7.15264]

File addition: calls_from_preds.go (----------)

Insertion in CHANGELOG.md at line 4 [9.1]

Insertion in CHANGELOG.md at line 29 [9.1]

Insertion in .ignore at line 14 [10.1]