calls_clip_labels_test.go
package calls
import (
"encoding/csv"
"os"
"path/filepath"
"strings"
"testing"
"skraak/datafile"
)
// --- test helpers (test file only) ---
func writeDataFile(t *testing.T, dir, name string, df *datafile.DataFile) {
t.Helper()
if err := df.Write(filepath.Join(dir, name)); err != nil {
t.Fatalf("write .data file %s: %v", name, err)
}
}
func writeMapping(t *testing.T, dir, json string) {
t.Helper()
if err := os.WriteFile(filepath.Join(dir, "mapping.json"), []byte(json), 0644); err != nil {
t.Fatalf("write mapping.json: %v", err)
}
}
// parseCSV reads the output CSV, returning header and rows.
func parseCSV(t *testing.T, path string) ([]string, [][]string) {
t.Helper()
f, err := os.Open(path)
if err != nil {
t.Fatalf("open CSV %s: %v", path, err)
}
defer f.Close()
r := csv.NewReader(f)
header, err := r.Read()
if err != nil {
t.Fatalf("read header: %v", err)
}
rows, err := r.ReadAll()
if err != nil {
t.Fatalf("read rows: %v", err)
}
return header, rows
}
// clipLabels calls CallsClipLabels with standard test parameters.
func clipLabels(t *testing.T, dir string, extra ...func(*CallsClipLabelsInput)) CallsClipLabelsOutput {
t.Helper()
input := CallsClipLabelsInput{
Folder: dir,
MappingPath: filepath.Join(dir, "mapping.json"),
OutputPath: filepath.Join(dir, "clip_labels.csv"),
ClipDuration: 5,
ClipOverlap: 0,
MinLabelOverlap: 0.25,
FinalClip: "full",
}
for _, fn := range extra {
fn(&input)
}
out, err := CallsClipLabels(input)
if err != nil {
t.Fatalf("CallsClipLabels: %v", err)
}
return out
}
// --- tests ---
func TestClipLabels_RealClassTrue(t *testing.T) {
dir := t.TempDir()
writeDataFile(t, dir, "rec.wav.data", &datafile.DataFile{
Meta: &datafile.DataMeta{Duration: 20},
Segments: []*datafile.Segment{
{
StartTime: 3, EndTime: 8, FreqLow: 100, FreqHigh: 5000,
Labels: []*datafile.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
out := clipLabels(t, dir)
header, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
// Header: file, start_time, end_time, Kiwi
if len(header) != 4 || header[3] != "Kiwi" {
t.Fatalf("header = %v, want [..., Kiwi]", header)
}
// Clip 0-5 overlaps segment 3-8 by 2s ≥ 0.25 → Kiwi=True
// Clip 5-10 overlaps segment 3-8 by 3s ≥ 0.25 → Kiwi=True
// Clip 10-15, 15-20 → Kiwi=False
kiwiCol := 3
for i, row := range rows {
switch row[1] {
case "0.0", "5.0":
if row[kiwiCol] != "True" {
t.Errorf("row %d (start=%s): Kiwi=%s, want True", i, row[1], row[kiwiCol])
}
case "10.0", "15.0":
if row[kiwiCol] != "False" {
t.Errorf("row %d (start=%s): Kiwi=%s, want False", i, row[1], row[kiwiCol])
}
}
}
if out.PerClassTrueCount["Kiwi"] != 2 {
t.Errorf("PerClassTrueCount[Kiwi] = %d, want 2", out.PerClassTrueCount["Kiwi"])
}
}
func TestClipLabels_GapClipsAllFalse(t *testing.T) {
dir := t.TempDir()
// 15s file, Kiwi segment 0-5 only → clips 5-10 and 10-15 are gaps
writeDataFile(t, dir, "rec.wav.data", &datafile.DataFile{
Meta: &datafile.DataMeta{Duration: 15},
Segments: []*datafile.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*datafile.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
out := clipLabels(t, dir)
if out.ClipsAllFalseGap != 2 {
t.Errorf("ClipsAllFalseGap = %d, want 2", out.ClipsAllFalseGap)
}
if out.PerClassTrueCount["Kiwi"] != 1 {
t.Errorf("PerClassTrueCount[Kiwi] = %d, want 1", out.PerClassTrueCount["Kiwi"])
}
if out.RowsWritten != 3 {
t.Errorf("RowsWritten = %d, want 3", out.RowsWritten)
}
}
func TestClipLabels_NegativeOverridesPositive(t *testing.T) {
dir := t.TempDir()
// Kiwi segment 0-8, Not segment 0-4 → clip 0-5 overlaps both → __NEGATIVE__ wins
// Clip 5-10 overlaps only Kiwi (3s) → True
writeDataFile(t, dir, "rec.wav.data", &datafile.DataFile{
Meta: &datafile.DataMeta{Duration: 10},
Segments: []*datafile.Segment{
{
StartTime: 0, EndTime: 8, FreqLow: 100, FreqHigh: 5000,
Labels: []*datafile.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
{
StartTime: 0, EndTime: 4, FreqLow: 100, FreqHigh: 5000,
Labels: []*datafile.Label{{Species: "Not", Certainty: 100, Filter: "f1"}},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Not":{"species":"__NEGATIVE__"}}`)
out := clipLabels(t, dir)
if out.ClipsNegative != 1 {
t.Errorf("ClipsNegative = %d, want 1", out.ClipsNegative)
}
_, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
// Clip 0-5: negative hit → all-False (Not overlaps 0-4 by 4s)
if rows[0][3] != "False" {
t.Errorf("clip 0-5 Kiwi = %s, want False (overridden by __NEGATIVE__)", rows[0][3])
}
// Clip 5-10: only Kiwi overlaps (3s) → True
if rows[1][3] != "True" {
t.Errorf("clip 5-10 Kiwi = %s, want True", rows[1][3])
}
}
func TestClipLabels_IgnoreExcludesClip(t *testing.T) {
dir := t.TempDir()
// Don't Know segment 0-5, Kiwi segment 6-10
// Clip 0-5 overlaps __IGNORE__ → excluded
// Clip 5-10 overlaps Kiwi → emitted with True
writeDataFile(t, dir, "rec.wav.data", &datafile.DataFile{
Meta: &datafile.DataMeta{Duration: 15},
Segments: []*datafile.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*datafile.Label{{Species: "Don't Know", Certainty: 0, Filter: "f1"}},
},
{
StartTime: 6, EndTime: 10, FreqLow: 100, FreqHigh: 5000,
Labels: []*datafile.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Don't Know":{"species":"__IGNORE__"}}`)
out := clipLabels(t, dir)
if out.ClipsIgnored != 1 {
t.Errorf("ClipsIgnored = %d, want 1", out.ClipsIgnored)
}
if out.SegmentsIgnored != 1 {
t.Errorf("SegmentsIgnored = %d, want 1", out.SegmentsIgnored)
}
// Only 2 rows: clip 5-10 (Kiwi=True) and clip 10-15 (gap)
if out.RowsWritten != 2 {
t.Errorf("RowsWritten = %d, want 2", out.RowsWritten)
}
}
func TestClipLabels_FilterRestrictsLabels(t *testing.T) {
dir := t.TempDir()
// Same time range, two filters. Only "wanted" should contribute.
writeDataFile(t, dir, "rec.wav.data", &datafile.DataFile{
Meta: &datafile.DataMeta{Duration: 10},
Segments: []*datafile.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*datafile.Label{
{Species: "Kiwi", Certainty: 100, Filter: "wanted"},
{Species: "Not", Certainty: 100, Filter: "unwanted"},
},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Not":{"species":"__NEGATIVE__"}}`)
out := clipLabels(t, dir, func(in *CallsClipLabelsInput) { in.Filter = "wanted" })
// Only Kiwi from "wanted" filter → clip 0-5 should be Kiwi=True
// Not from "unwanted" filter should be ignored → no __NEGATIVE__ override
if out.ClipsNegative != 0 {
t.Errorf("ClipsNegative = %d, want 0 (Not filter excluded)", out.ClipsNegative)
}
if out.PerClassTrueCount["Kiwi"] != 1 {
t.Errorf("PerClassTrueCount[Kiwi] = %d, want 1", out.PerClassTrueCount["Kiwi"])
}
}
func TestClipLabels_MappingCoverageError(t *testing.T) {
dir := t.TempDir()
writeDataFile(t, dir, "rec.wav.data", &datafile.DataFile{
Meta: &datafile.DataMeta{Duration: 10},
Segments: []*datafile.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*datafile.Label{{Species: "Mystery", Certainty: 100, Filter: "f1"}},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
input := CallsClipLabelsInput{
Folder: dir,
MappingPath: filepath.Join(dir, "mapping.json"),
OutputPath: filepath.Join(dir, "clip_labels.csv"),
ClipDuration: 5,
ClipOverlap: 0,
MinLabelOverlap: 0.25,
FinalClip: "full",
}
_, err := CallsClipLabels(input)
if err == nil {
t.Fatal("expected error for missing species in mapping")
}
if !strings.Contains(err.Error(), "Mystery") {
t.Errorf("error should mention missing species, got: %v", err)
}
}
func TestClipLabels_AppendMode(t *testing.T) {
dir := t.TempDir()
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
// First file
writeDataFile(t, dir, "a.wav.data", &datafile.DataFile{
Meta: &datafile.DataMeta{Duration: 5},
Segments: []*datafile.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*datafile.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
out1 := clipLabels(t, dir)
if out1.RowsWritten != 1 {
t.Fatalf("first run: RowsWritten = %d, want 1", out1.RowsWritten)
}
// Second run on same output file but with a different input folder
// Simulate append by running again — should fail on duplicate
_, err := CallsClipLabels(CallsClipLabelsInput{
Folder: dir,
MappingPath: filepath.Join(dir, "mapping.json"),
OutputPath: filepath.Join(dir, "clip_labels.csv"),
ClipDuration: 5,
ClipOverlap: 0,
MinLabelOverlap: 0.25,
FinalClip: "full",
})
if err == nil {
t.Fatal("expected duplicate error on second run with same folder")
}
if !strings.Contains(err.Error(), "duplicate") {
t.Errorf("error should mention duplicate, got: %v", err)
}
}
func TestClipLabels_MultipleFiles(t *testing.T) {
dir := t.TempDir()
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
writeDataFile(t, dir, "a.wav.data", &datafile.DataFile{
Meta: &datafile.DataMeta{Duration: 10},
Segments: []*datafile.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*datafile.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
writeDataFile(t, dir, "b.wav.data", &datafile.DataFile{
Meta: &datafile.DataMeta{Duration: 5},
Segments: []*datafile.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*datafile.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
out := clipLabels(t, dir)
if out.DataFilesParsed != 2 {
t.Errorf("DataFilesParsed = %d, want 2", out.DataFilesParsed)
}
// a: 2 clips (0-5, 5-10), b: 1 clip (0-5) = 3 total
if out.RowsWritten != 3 {
t.Errorf("RowsWritten = %d, want 3", out.RowsWritten)
}
_, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
files := map[string]int{}
for _, r := range rows {
files[r[0]]++
}
if len(files) != 2 {
t.Errorf("expected 2 distinct files in CSV, got %d", len(files))
}
}