XFBDXVMIJ3HPLDLOZ4YCVOZI2QTURMVIKD3QJQX4I4K4MT6IH5AQC SWLGSMXGYI3G4YG35W47V5QG33CDWZQMJYCJVTQKT4XXEPF3OUFAC IFVRAERTCCDICNTYTG3TX2WASB6RXQQEJWWXQMQZJSQDQ3HLE5OQC 7NS27QXZMVTZBK4VPMYL5IKGSTTAWR6NDG5SOVITNX44VNIRZPMAC K2K53ZPCIG7VEFW5XQUJOMJJMXVSVRBJPINBZKUBORTMU44R2F4QC D4EL6RSTSZ3S3IDSETRNGLJHZKGZEE2V2OZIOKQK6LRLHQNS77JQC AUTP2GAED3F4BQVCLOPRORNLCFUGTSPW6542OA2BPP7TRXPIFKRAC package toolsimport ("os""path/filepath""testing")func TestDetectAnomalies_LabelMismatch(t *testing.T) {dir := t.TempDir()// Same time range, different calltypes across two modelsdata := `[{"Operator":"test"},` +`[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-a"},` +`{"species":"Kiwi","calltype":"Male","certainty":100,"filter":"model-b"}]]]`if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {t.Fatal(err)}out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})if err != nil {t.Fatal(err)}if out.LabelMismatches != 1 {t.Errorf("expected 1 label mismatch, got %d", out.LabelMismatches)}if out.CertaintyMismatches != 0 {t.Errorf("expected 0 certainty mismatches, got %d", out.CertaintyMismatches)}if out.Anomalies[0].Type != "label_mismatch" {t.Errorf("expected label_mismatch, got %s", out.Anomalies[0].Type)}}func TestDetectAnomalies_CertaintyMismatch(t *testing.T) {dir := t.TempDir()// Same time range, same labels, different certaintydata := `[{"Operator":"test"},` +`[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":90,"filter":"model-a"},` +`{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-b"}]]]`if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {t.Fatal(err)}out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})if err != nil {t.Fatal(err)}if out.CertaintyMismatches != 1 {t.Errorf("expected 1 certainty mismatch, got %d", out.CertaintyMismatches)}if out.LabelMismatches != 0 {t.Errorf("expected 0 label mismatches, got %d", out.LabelMismatches)}}func TestDetectAnomalies_NoAnomalyWhenAgreement(t *testing.T) {dir := t.TempDir()data := `[{"Operator":"test"},` +`[0,10,100,1000,[{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-a"},` +`{"species":"Kiwi","calltype":"Duet","certainty":100,"filter":"model-b"}]]]`if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {t.Fatal(err)}out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})if err != nil {t.Fatal(err)}if out.AnomaliesTotal != 0 {t.Errorf("expected 0 anomalies, got %d", out.AnomaliesTotal)}}func TestDetectAnomalies_LonelySegmentSkipped(t *testing.T) {dir := t.TempDir()// model-a has a segment, model-b has no segment in this filedata := `[{"Operator":"test"},` +`[0,10,100,1000,[{"species":"Kiwi","certainty":100,"filter":"model-a"}]]]`if err := os.WriteFile(filepath.Join(dir, "f1.data"), []byte(data), 0644); err != nil {t.Fatal(err)}out, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a", "model-b"}})if err != nil {t.Fatal(err)}if out.AnomaliesTotal != 0 {t.Errorf("lonely segment should be skipped, got %d anomalies", out.AnomaliesTotal)}if out.FilesWithAllModels != 0 {t.Errorf("file missing a model should not count as FilesWithAllModels")}}func TestDetectAnomalies_FailsWithOneModel(t *testing.T) {dir := t.TempDir()_, err := DetectAnomalies(DetectAnomaliesInput{Folder: dir, Models: []string{"model-a"}})if err == nil {t.Error("expected error with only 1 model")}}
package toolsimport ("fmt""os""path/filepath""skraak/utils")type DetectAnomaliesInput struct {Folder stringModels []string // at least 2 filter namesSpecies []string // optional scope; empty = all species}type DetectAnomaliesOutput struct {Folder string `json:"folder"`Models []string `json:"models"`FilesExamined int `json:"files_examined"`FilesWithAllModels int `json:"files_with_all_models"`AnomaliesTotal int `json:"anomalies_total"`LabelMismatches int `json:"label_mismatches"`CertaintyMismatches int `json:"certainty_mismatches"`Anomalies []Anomaly `json:"anomalies,omitempty"`Error string `json:"error,omitempty"`}type Anomaly struct {File string `json:"file"`Type string `json:"type"` // "label_mismatch" | "certainty_mismatch"Segments []AnomalySegment `json:"segments"`}type AnomalySegment struct {Model string `json:"model"`Start float64 `json:"start"`End float64 `json:"end"`Species string `json:"species"`CallType string `json:"calltype,omitempty"`Certainty int `json:"certainty"`}// DetectAnomalies compares corresponding segments across multiple ML model filters// within each .data file. Segments are matched by time overlap (same logic as propagate).// Lonely segments (no overlap in one or more models) are silently skipped.// Anomalies are flagged when overlapping segments disagree on species+calltype,// or when labels match but certainty values differ.func DetectAnomalies(input DetectAnomaliesInput) (DetectAnomaliesOutput, error) {folder := filepath.Clean(input.Folder)output := DetectAnomaliesOutput{Folder: folder,Models: input.Models,}if len(input.Models) < 2 {output.Error = "at least 2 --model values required"return output, fmt.Errorf("%s", output.Error)}for i, a := range input.Models {for j, b := range input.Models {if i != j && a == b {output.Error = "duplicate --model values are not allowed"return output, fmt.Errorf("%s", output.Error)}}}info, err := os.Stat(input.Folder)if err != nil {output.Error = fmt.Sprintf("folder not found: %s", input.Folder)return output, fmt.Errorf("%s", output.Error)}if !info.IsDir() {output.Error = fmt.Sprintf("not a directory: %s", input.Folder)return output, fmt.Errorf("%s", output.Error)}files, err := utils.FindDataFiles(folder)if err != nil {output.Error = fmt.Sprintf("list .data files: %v", err)return output, fmt.Errorf("%s", output.Error)}scopeSet := make(map[string]bool, len(input.Species))for _, s := range input.Species {scopeSet[s] = true}for _, path := range files {df, err := utils.ParseDataFile(path)if err != nil {continue}output.FilesExamined++anomalies := detectAnomaliesInFile(df, path, input.Models, scopeSet)if anomalies == nil {// file didn't have all models presentcontinue}output.FilesWithAllModels++for _, a := range anomalies {if a.Type == "label_mismatch" {output.LabelMismatches++} else {output.CertaintyMismatches++}}output.Anomalies = append(output.Anomalies, anomalies...)}output.AnomaliesTotal = len(output.Anomalies)return output, nil}// labeledSeg pairs a segment with the specific label matching the model filter.type labeledSeg struct {seg *utils.Segmentlabel *utils.Label}// detectAnomaliesInFile returns nil if the file doesn't contain all required models.func detectAnomaliesInFile(df *utils.DataFile, path string, models []string, scope map[string]bool) []Anomaly {// Collect ALL labeled segments per model — no scope filtering here.// Scope is applied to anchor selection only, so a "Don't Know" label in model[1]// against a "Kiwi" anchor in model[0] is correctly surfaced as a label_mismatch.modelSegs := make(map[string][]labeledSeg, len(models))for _, seg := range df.Segments {for _, lbl := range seg.Labels {for _, model := range models {if lbl.Filter == model {modelSegs[model] = append(modelSegs[model], labeledSeg{seg: seg, label: lbl})break}}}}// Skip file if any model is entirely absent.for _, model := range models {if len(modelSegs[model]) == 0 {return nil}}var anomalies []Anomaly// Use models[0] as anchor. Scope filtering applies here only — other models// contribute whatever they actually say for the overlapping time range.for _, anchor := range modelSegs[models[0]] {if len(scope) > 0 {key := anchor.label.Speciesif anchor.label.CallType != "" {key += "+" + anchor.label.CallType}if !scope[key] && !scope[anchor.label.Species] {continue}}// Find overlapping segments in every other model.matches := make(map[string][]labeledSeg, len(models)-1)lonely := falsefor _, model := range models[1:] {for _, candidate := range modelSegs[model] {if overlaps(anchor.seg, candidate.seg) {matches[model] = append(matches[model], candidate)}}if len(matches[model]) == 0 {lonely = truebreak}}if lonely {continue}// Build comparison group: anchor + first overlapping match per other model// (consistent with propagate's approach).group := []labeledSeg{anchor}for _, model := range models[1:] {group = append(group, matches[model][0])}// Check species+calltype agreement.refSpecies := group[0].label.SpeciesrefCallType := group[0].label.CallTypelabelMatch := truefor _, ls := range group[1:] {if ls.label.Species != refSpecies || ls.label.CallType != refCallType {labelMatch = falsebreak}}if !labelMatch {anomalies = append(anomalies, Anomaly{File: path, Type: "label_mismatch", Segments: buildAnomalySegs(group, models)})continue}// Labels agree — check certainty.refCertainty := group[0].label.Certaintyfor _, ls := range group[1:] {if ls.label.Certainty != refCertainty {anomalies = append(anomalies, Anomaly{File: path, Type: "certainty_mismatch", Segments: buildAnomalySegs(group, models)})break}}}return anomalies}func buildAnomalySegs(group []labeledSeg, models []string) []AnomalySegment {segs := make([]AnomalySegment, len(group))for i, ls := range group {segs[i] = AnomalySegment{Model: models[i],Start: ls.seg.StartTime,End: ls.seg.EndTime,Species: ls.label.Species,CallType: ls.label.CallType,Certainty: ls.label.Certainty,}}return segs}// overlaps returns true if two segments share any time overlap.func overlaps(a, b *utils.Segment) bool {return a.StartTime < b.EndTime && b.StartTime < a.EndTime}
package cmdimport ("encoding/json""fmt""os""skraak/tools")func printDetectAnomaliesUsage() {fmt.Fprintf(os.Stderr, "Usage: skraak calls detect-anomalies [options]\n\n")fmt.Fprintf(os.Stderr, "Compare corresponding segments across ML model filters and flag disagreements.\n")fmt.Fprintf(os.Stderr, "Segments are matched by time overlap. Lonely segments (no overlap in all models) are skipped.\n\n")fmt.Fprintf(os.Stderr, "Options:\n")fmt.Fprintf(os.Stderr, " --folder <path> Folder containing .data files (required)\n")fmt.Fprintf(os.Stderr, " --model <name> Filter name to compare (required, repeat for each model, min 2)\n")fmt.Fprintf(os.Stderr, " --species <name> Scope to species or species+calltype (optional, repeat to add more)\n")fmt.Fprintf(os.Stderr, "\nAnomaly types:\n")fmt.Fprintf(os.Stderr, " label_mismatch Species or calltype disagrees across models\n")fmt.Fprintf(os.Stderr, " certainty_mismatch Labels agree but certainty values differ\n")fmt.Fprintf(os.Stderr, "\nExamples:\n")fmt.Fprintf(os.Stderr, " skraak calls detect-anomalies --folder ./data \\\n")fmt.Fprintf(os.Stderr, " --model opensoundscape-kiwi-1.0 --model opensoundscape-kiwi-1.2\n")fmt.Fprintf(os.Stderr, " skraak calls detect-anomalies --folder ./data \\\n")fmt.Fprintf(os.Stderr, " --model opensoundscape-kiwi-1.0 --model opensoundscape-kiwi-1.2 --model opensoundscape-kiwi-1.5 \\\n")fmt.Fprintf(os.Stderr, " --species Kiwi+Duet --species Kiwi+Male\n")}func runCallsDetectAnomalies(args []string) {var folder stringvar models []stringvar species []stringi := 0for i < len(args) {arg := args[i]switch arg {case "--folder":if i+1 >= len(args) {fmt.Fprintf(os.Stderr, "Error: --folder requires a value\n")os.Exit(1)}folder = args[i+1]i += 2case "--model":if i+1 >= len(args) {fmt.Fprintf(os.Stderr, "Error: --model requires a value\n")os.Exit(1)}models = append(models, args[i+1])i += 2case "--species":if i+1 >= len(args) {fmt.Fprintf(os.Stderr, "Error: --species requires a value\n")os.Exit(1)}species = append(species, args[i+1])i += 2case "--help", "-h":printDetectAnomaliesUsage()os.Exit(0)default:fmt.Fprintf(os.Stderr, "Error: unknown flag: %s\n\n", arg)printDetectAnomaliesUsage()os.Exit(1)}}if folder == "" {fmt.Fprintf(os.Stderr, "Error: --folder is required\n\n")printDetectAnomaliesUsage()os.Exit(1)}if len(models) < 2 {fmt.Fprintf(os.Stderr, "Error: at least 2 --model values required\n\n")printDetectAnomaliesUsage()os.Exit(1)}output, err := tools.DetectAnomalies(tools.DetectAnomaliesInput{Folder: folder,Models: models,Species: species,})if err != nil {fmt.Fprintf(os.Stderr, "Error: %v\n", err)os.Exit(1)}fmt.Fprintf(os.Stderr, "Examined %d files, %d had all models\n",output.FilesExamined, output.FilesWithAllModels)fmt.Fprintf(os.Stderr, "Anomalies: %d total (%d label, %d certainty)\n",output.AnomaliesTotal, output.LabelMismatches, output.CertaintyMismatches)enc := json.NewEncoder(os.Stdout)enc.SetIndent("", " ")enc.Encode(output)}
fmt.Fprintf(os.Stderr, " push-certainty Promote certainty=90 segments to 100 for a filtered set\n")
fmt.Fprintf(os.Stderr, " push-certainty Promote certainty=90 segments to 100 for a filtered set\n")fmt.Fprintf(os.Stderr, " detect-anomalies Flag label/certainty disagreements across ML model filters\n")