PDAOL3DWN4OBXEUEQRASZU4HUASFW3QNTIOYNRYJDXC27AWGLMXAC Q4YXRA3T5JUJEAGNLAY2OPFBO6Q6CH43P7KXQKKINZX6RT2PRM3QC IFVRAERTCCDICNTYTG3TX2WASB6RXQQEJWWXQMQZJSQDQ3HLE5OQC OGLLBQQYE5KICDMI6EX7ZI4TZT5RB7UFHH7O2DUOZ44QQXVL5YAAC NAZQZRYQTXWVE2VFY65ONSD6O3EUMNRHARCDVH2D2HKM3YH4RGUAC 47GPFVLW7RWBBHHUZYMEEYWG3KBJBWELR7RDKMJRWMNRWYJUBR7QC 4AFSDSVWQCDWDJEH3DD2S7UUB2LHLQOZLH5SZ6LS4LCSBG4EORXAC DORZF5HSV672ZP5HUDYB3J6TBH5O2LMXJE4HPSE7H5SOGZQBDCXQC DBOROCRFD6A5SJBMFYFEJI5S5M77X4EFEK6KDQWA5QDMQJKIHRWQC RUF5K5CL542GK5UIIIBHPIMGGCXU72IWS5OFBVTI5DRX36OSPJDAC MNDAPW6MTTHAQ2RQFXRD4ZGX64ZM45MEU2CYVSSLWENA73KXZBEAC HHT7M27I3YKGGJOTVTMRVWXATDWUZKIVVLM7IVI7SJRB7FLT2DAQC 2G5WD67ZAM7TUUFLK33AYMNMN7DWWN37I33XU3K2YQUDFUUULKGAC RMWLXG5HGB44LH3CEA7FWTAFPSZQZQGH52OHVQJUDP6ASPVDVQJAC J62FGJ3BGFTUWEOUON4ATYNDFBEUIR6FCIOZSHOVHAA7KGFYHW6AC package utilsimport ("testing")func TestParseSelectionFilename(t *testing.T) {tests := []struct {name stringfilename stringexpectedBase stringexpectedStart float64expectedEnd float64expectError boolerrorContains string}{{name: "Simple filename with integer times",filename: "20250517_214501-102-133.wav",expectedBase: "20250517_214501",expectedStart: 102.0,expectedEnd: 133.0,expectError: false,},{name: "Filename with decimal times",filename: "20250517_214501-102.5-133.7.wav",expectedBase: "20250517_214501",expectedStart: 102.5,expectedEnd: 133.7,expectError: false,},{name: "Filename with prefix and dashes",filename: "A05-20250517_214501-102-133.wav",expectedBase: "A05-20250517_214501",expectedStart: 102.0,expectedEnd: 133.0,expectError: false,},{name: "Filename with multiple dashes in base",filename: "Site-A-05-20250517_214501-102.5-133.7.wav",expectedBase: "Site-A-05-20250517_214501",expectedStart: 102.5,expectedEnd: 133.7,expectError: false,},{name: "PNG extension",filename: "A05-20250517_214501-102-133.png",expectedBase: "A05-20250517_214501",expectedStart: 102.0,expectedEnd: 133.0,expectError: false,},{name: "Zero start time",filename: "20250517_214501-0-10.5.wav",expectedBase: "20250517_214501",expectedStart: 0.0,expectedEnd: 10.5,expectError: false,},{name: "Invalid format - too few parts",filename: "20250517_214501-102.wav",expectError: true,errorContains: "expected at least 3 dash-separated parts",},{name: "Invalid format - no dashes",filename: "20250517_214501.wav",expectError: true,errorContains: "expected at least 3 dash-separated parts",},{name: "Invalid start time - not a number",filename: "20250517_214501-abc-133.wav",expectError: true,errorContains: "invalid start time",},{name: "Invalid end time - not a number",filename: "20250517_214501-102-xyz.wav",expectError: true,errorContains: "invalid end time",},{name: "Start time equals end time",filename: "20250517_214501-102-102.wav",expectError: true,errorContains: "start time",},{name: "Start time greater than end time",filename: "20250517_214501-133-102.wav",expectError: true,errorContains: "start time",},}for _, tt := range tests {t.Run(tt.name, func(t *testing.T) {base, start, end, err := ParseSelectionFilename(tt.filename)if tt.expectError {if err == nil {t.Errorf("Expected error containing '%s', got nil", tt.errorContains)} else if tt.errorContains != "" && !containsString(err.Error(), tt.errorContains) {t.Errorf("Expected error containing '%s', got: %v", tt.errorContains, err)}return}if err != nil {t.Errorf("Unexpected error: %v", err)return}if base != tt.expectedBase {t.Errorf("Expected base '%s', got '%s'", tt.expectedBase, base)}if start != tt.expectedStart {t.Errorf("Expected start %.2f, got %.2f", tt.expectedStart, start)}if end != tt.expectedEnd {t.Errorf("Expected end %.2f, got %.2f", tt.expectedEnd, end)}})}}func TestParseMLFolderName(t *testing.T) {tests := []struct {name stringfolderName stringexpectedFilterName stringexpectedDate stringexpectError boolerrorContains string}{{name: "Standard folder name",folderName: "Clips_opensoundscape-kiwi-1.0_2025-11-14",expectedFilterName: "opensoundscape-kiwi-1.0",expectedDate: "2025-11-14",expectError: false,},{name: "Filter name with underscores",folderName: "Clips_my_model_v2_2025-01-15",expectedFilterName: "my_model_v2",expectedDate: "2025-01-15",expectError: false,},{name: "Simple filter name",folderName: "Clips_kiwi_2024-12-31",expectedFilterName: "kiwi",expectedDate: "2024-12-31",expectError: false,},{name: "Missing Clips_ prefix",folderName: "opensoundscape-kiwi-1.0_2025-11-14",expectError: true,errorContains: "invalid ML folder name format",},{name: "Missing date",folderName: "Clips_opensoundscape-kiwi-1.0",expectError: true,errorContains: "invalid ML folder name format",},{name: "Invalid date format",folderName: "Clips_opensoundscape-kiwi-1.0_11-14-2025",expectError: true,errorContains: "invalid ML folder name format",},{name: "Missing filter name",folderName: "Clips__2025-11-14",expectError: true,errorContains: "invalid ML folder name format",},{name: "Extra underscores at end",folderName: "Clips_kiwi_2025-11-14_extra",expectError: true,errorContains: "invalid ML folder name format",},}for _, tt := range tests {t.Run(tt.name, func(t *testing.T) {filterName, date, err := ParseMLFolderName(tt.folderName)if tt.expectError {if err == nil {t.Errorf("Expected error containing '%s', got nil", tt.errorContains)} else if tt.errorContains != "" && !containsString(err.Error(), tt.errorContains) {t.Errorf("Expected error containing '%s', got: %v", tt.errorContains, err)}return}if err != nil {t.Errorf("Unexpected error: %v", err)return}if filterName != tt.expectedFilterName {t.Errorf("Expected filter name '%s', got '%s'", tt.expectedFilterName, filterName)}if date != tt.expectedDate {t.Errorf("Expected date '%s', got '%s'", tt.expectedDate, date)}})}}func TestValidateWAVPNGPairs(t *testing.T) {tests := []struct {name stringwavFiles []stringpngFiles []stringexpectedPaired []stringexpectedMismatched []string}{{name: "All files paired",wavFiles: []string{"file1-10-20.wav", "file2-30-40.wav"},pngFiles: []string{"file1-10-20.png", "file2-30-40.png"},expectedPaired: []string{"file1-10-20", "file2-30-40"},expectedMismatched: []string{},},{name: "Missing PNG for one WAV",wavFiles: []string{"file1-10-20.wav", "file2-30-40.wav", "file3-50-60.wav"},pngFiles: []string{"file1-10-20.png", "file2-30-40.png"},expectedPaired: []string{"file1-10-20", "file2-30-40"},expectedMismatched: []string{"file3-50-60.wav"},},{name: "Missing all PNGs",wavFiles: []string{"file1-10-20.wav", "file2-30-40.wav"},pngFiles: []string{},expectedPaired: []string{},expectedMismatched: []string{"file1-10-20.wav", "file2-30-40.wav"},},{name: "Extra PNG files (no WAV)",wavFiles: []string{"file1-10-20.wav"},pngFiles: []string{"file1-10-20.png", "file2-30-40.png", "file3-50-60.png"},expectedPaired: []string{"file1-10-20"},expectedMismatched: []string{},},{name: "No files",wavFiles: []string{},pngFiles: []string{},expectedPaired: []string{},expectedMismatched: []string{},},}for _, tt := range tests {t.Run(tt.name, func(t *testing.T) {paired, mismatched := ValidateWAVPNGPairs(tt.wavFiles, tt.pngFiles)if !stringSlicesEqual(paired, tt.expectedPaired) {t.Errorf("Expected paired %v, got %v", tt.expectedPaired, paired)}if !stringSlicesEqual(mismatched, tt.expectedMismatched) {t.Errorf("Expected mismatched %v, got %v", tt.expectedMismatched, mismatched)}})}}func TestExtractDateTimePattern(t *testing.T) {tests := []struct {name stringfilename stringexpectedPattern stringexpectedFound bool}{{name: "8-digit date (YYYYMMDD)",filename: "20250517_214501",expectedPattern: "20250517_214501",expectedFound: true,},{name: "8-digit date with prefix",filename: "A05-20250517_214501",expectedPattern: "20250517_214501",expectedFound: true,},{name: "8-digit date with suffix",filename: "20250517_214501-extra",expectedPattern: "20250517_214501",expectedFound: true,},{name: "6-digit date (YYMMDD or DDMMYY)",filename: "250517_214501",expectedPattern: "250517_214501",expectedFound: true,},{name: "6-digit date with prefix",filename: "Site-170525_214501",expectedPattern: "170525_214501",expectedFound: true,},{name: "No date pattern",filename: "file_without_date",expectedPattern: "",expectedFound: false,},{name: "Incomplete pattern (missing time)",filename: "20250517",expectedPattern: "",expectedFound: false,},{name: "Incomplete pattern (missing date)",filename: "214501",expectedPattern: "",expectedFound: false,},{name: "Wrong separator",filename: "20250517-214501",expectedPattern: "",expectedFound: false,},}for _, tt := range tests {t.Run(tt.name, func(t *testing.T) {pattern, found := ExtractDateTimePattern(tt.filename)if found != tt.expectedFound {t.Errorf("Expected found=%v, got found=%v", tt.expectedFound, found)}if pattern != tt.expectedPattern {t.Errorf("Expected pattern '%s', got '%s'", tt.expectedPattern, pattern)}})}}// Helper functionsfunc containsString(s, substr string) bool {return len(substr) == 0 || (len(s) > 0 && len(substr) > 0 &&len(s) >= len(substr) && contains(s, substr))}func contains(s, substr string) bool {for i := 0; i <= len(s)-len(substr); i++ {if s[i:i+len(substr)] == substr {return true}}return false}func stringSlicesEqual(a, b []string) bool {if len(a) != len(b) {return false}if len(a) == 0 {return true}for i := range a {if a[i] != b[i] {return false}}return true}
package utilsimport ("fmt""path/filepath""regexp""strconv""strings")// ParseSelectionFilename parses a selection filename to extract base filename and time bounds// Format: {base_filename}-{start_seconds}-{end_seconds}.{ext}// Example: "A05-20250517_214501-102.5-133.7.wav" -> ("A05-20250517_214501", 102.5, 133.7, nil)func ParseSelectionFilename(filename string) (baseFilename string, startS, endS float64, err error) {// Remove file extensionnameWithoutExt := strings.TrimSuffix(filename, filepath.Ext(filename))// Split on dashes from the end// We need to find the last 2 dashes that separate timesparts := strings.Split(nameWithoutExt, "-")if len(parts) < 3 {return "", 0, 0, fmt.Errorf("invalid selection filename format (expected at least 3 dash-separated parts): %s", filename)}// Last two parts are end_time and start_timeendTimeStr := parts[len(parts)-1]startTimeStr := parts[len(parts)-2]// Everything before is the base filenamebaseFilename = strings.Join(parts[:len(parts)-2], "-")// Parse timesstartS, err = strconv.ParseFloat(startTimeStr, 64)if err != nil {return "", 0, 0, fmt.Errorf("invalid start time '%s': %w", startTimeStr, err)}endS, err = strconv.ParseFloat(endTimeStr, 64)if err != nil {return "", 0, 0, fmt.Errorf("invalid end time '%s': %w", endTimeStr, err)}// Validate: start < endif startS >= endS {return "", 0, 0, fmt.Errorf("start time (%.2f) must be less than end time (%.2f)", startS, endS)}return baseFilename, startS, endS, nil}// ParseMLFolderName parses the root folder name to extract filter name and date// Format: Clips_{filter_name}_{YYYY-MM-DD}// Example: "Clips_opensoundscape-kiwi-1.0_2025-11-14" -> ("opensoundscape-kiwi-1.0", "2025-11-14", nil)func ParseMLFolderName(folderName string) (filterName string, date string, err error) {// Regex: Clips_{anything}_{YYYY-MM-DD}// The filter name is everything between "Clips_" and the final underscore+datepattern := regexp.MustCompile(`^Clips_(.+)_(\d{4}-\d{2}-\d{2})$`)matches := pattern.FindStringSubmatch(folderName)if len(matches) != 3 {return "", "", fmt.Errorf("invalid ML folder name format (expected 'Clips_{filter_name}_{YYYY-MM-DD}'): %s", folderName)}filterName = matches[1]date = matches[2]return filterName, date, nil}// ValidateWAVPNGPairs checks that each WAV file has a corresponding PNG file// Returns lists of properly paired base names and mismatched filesfunc ValidateWAVPNGPairs(wavFiles, pngFiles []string) (paired []string, mismatched []string) {// Create a map of PNG base names for quick lookuppngMap := make(map[string]bool)for _, pngFile := range pngFiles {baseName := strings.TrimSuffix(pngFile, filepath.Ext(pngFile))pngMap[baseName] = true}// Check each WAV filefor _, wavFile := range wavFiles {baseName := strings.TrimSuffix(wavFile, filepath.Ext(wavFile))if pngMap[baseName] {paired = append(paired, baseName)} else {mismatched = append(mismatched, wavFile)}}return paired, mismatched}// ExtractDateTimePattern extracts the date_time pattern from a filename// Looks for patterns: YYYYMMDD_HHMMSS (8 digits) or YYMMDD_HHMMSS/DDMMYY_HHMMSS (6 digits)// Returns the pattern and whether it was found// Example: "A05-20250517_214501" -> ("20250517_214501", true)func ExtractDateTimePattern(filename string) (pattern string, found bool) {// Pattern: 8 digits + underscore + 6 digits (YYYYMMDD_HHMMSS)pattern8 := regexp.MustCompile(`(\d{8}_\d{6})`)if match := pattern8.FindString(filename); match != "" {return match, true}// Pattern: 6 digits + underscore + 6 digits (YYMMDD_HHMMSS or DDMMYY_HHMMSS)pattern6 := regexp.MustCompile(`(\d{6}_\d{6})`)if match := pattern6.FindString(filename); match != "" {return match, true}return "", false}// ExtractDateTimePattern extracts 6-digit or 8-digit date patterns from filenames// ValidateWAVPNGPairs checks that WAV and PNG files are properly paired for ML selections// ParseMLFolderName extracts filter name and date from ML folder structure// ParseSelectionFilename extracts base filename and time bounds from ML selection filename
}})}}func TestValidateLongID(t *testing.T) {tests := []struct {name stringid stringfieldName stringwantErr bool}{{"valid 21-char ID", "abc123XYZ789abc123XYZ", "test_id", false}, // exactly 21 chars{"valid with underscore", "abc_123_XYZ_789_abc_X", "test_id", false}, // exactly 21 chars{"empty string", "", "test_id", true},{"too short", "abc123XYZ789", "test_id", true}, // 12 chars{"too long", "abc123XYZ789abc123XYZ789ex", "test_id", true}, // 24 chars{"invalid chars", "abc@123#XYZ$789%abc^XY", "test_id", true},}for _, tt := range tests {t.Run(tt.name, func(t *testing.T) {err := ValidateLongID(tt.id, tt.fieldName)if (err != nil) != tt.wantErr {t.Errorf("ValidateLongID() error = %v, wantErr %v", err, tt.wantErr)
}})}}func TestValidateHash(t *testing.T) {tests := []struct {name stringhash stringwantErr bool}{{"valid hash", "0123456789abcdef", false},{"valid all letters", "abcdefabcdefabcd", false},{"valid all numbers", "1234567890123456", false},{"too short", "0123456789abcde", true},{"too long", "0123456789abcdef0", true},{"invalid chars", "ghijklmnopqrstuv", true},{"uppercase", "ABCDEF1234567890", true},{"mixed case", "aBcDeF1234567890", true},{"empty", "", true},}for _, tt := range tests {t.Run(tt.name, func(t *testing.T) {err := ValidateHash(tt.hash, "test_hash")if (err != nil) != tt.wantErr {t.Errorf("ValidateHash() error = %v, wantErr %v", err, tt.wantErr)}})}}func TestValidateFrequencyRange(t *testing.T) {low := func(v float64) *float64 { return &v }high := func(v float64) *float64 { return &v }tests := []struct {name stringfreqLow *float64freqHigh *float64wantErr bool}{{"both nil", nil, nil, false},{"valid range", low(100.0), high(1000.0), false},{"valid low only", low(100.0), nil, false},{"valid high only", nil, high(1000.0), false},{"at bounds", low(0.0), high(299999.0), false},{"low negative", low(-1.0), high(1000.0), true},{"high too high", low(100.0), high(300000.0), true},{"low equals high", low(500.0), high(500.0), true},{"low greater than high", low(1000.0), high(100.0), true},}for _, tt := range tests {t.Run(tt.name, func(t *testing.T) {err := ValidateFrequencyRange(tt.freqLow, tt.freqHigh)if (err != nil) != tt.wantErr {t.Errorf("ValidateFrequencyRange() error = %v, wantErr %v", err, tt.wantErr)
func TestValidateCertainty(t *testing.T) {val := func(v float64) *float64 { return &v }tests := []struct {name stringcertainty *float64wantErr bool}{{"nil", nil, false},{"zero", val(0.0), false},{"mid range", val(50.0), false},{"100", val(100.0), false},{"negative", val(-0.1), true},{"over 100", val(100.1), true},{"much over", val(200.0), true},}for _, tt := range tests {t.Run(tt.name, func(t *testing.T) {err := ValidateCertainty(tt.certainty)if (err != nil) != tt.wantErr {t.Errorf("ValidateCertainty() error = %v, wantErr %v", err, tt.wantErr)}})}}
// ValidateLongID validates 21-character nanoid formatfunc ValidateLongID(id, fieldName string) error {if id == "" {return fmt.Errorf("%s cannot be empty", fieldName)}if len(id) != LongIDLen {return fmt.Errorf("%s must be exactly %d characters (got %d)", fieldName, LongIDLen, len(id))}if !longIDRegex.MatchString(id) {return fmt.Errorf("%s has invalid format (expected alphanumeric nanoid)", fieldName)}return nil}
// ValidateHash validates XXH64 hash format (16 hex characters)func ValidateHash(hash, fieldName string) error {if len(hash) != HashLen {return fmt.Errorf("%s must be exactly %d characters (got %d)", fieldName, HashLen, len(hash))}if !hashRegex.MatchString(hash) {return fmt.Errorf("%s has invalid format (expected 16 hex characters)", fieldName)}return nil}// ValidateFrequencyRange validates frequency bounds for selectionsfunc ValidateFrequencyRange(freqLow, freqHigh *float64) error {if freqLow == nil && freqHigh == nil {return nil}// Schema limit is 300000 Hzconst maxFreq = 300000.0if freqLow != nil {if *freqLow < 0 {return fmt.Errorf("freq_low must be non-negative (got %v)", *freqLow)}if *freqLow >= maxFreq {return fmt.Errorf("freq_low must be less than %v Hz (got %v)", maxFreq, *freqLow)}}if freqHigh != nil {if *freqHigh < 0 {return fmt.Errorf("freq_high must be non-negative (got %v)", *freqHigh)}if *freqHigh >= maxFreq {return fmt.Errorf("freq_high must be less than %v Hz (got %v)", maxFreq, *freqHigh)}}// If both provided, low must be less than highif freqLow != nil && freqHigh != nil && *freqLow >= *freqHigh {return fmt.Errorf("freq_low (%v) must be less than freq_high (%v)", *freqLow, *freqHigh)}return nil}// ValidateCertainty validates certainty percentage (0-100)func ValidateCertainty(certainty *float64) error {if certainty == nil {return nil}if *certainty < 0 || *certainty > 100 {return fmt.Errorf("certainty must be between 0 and 100 (got %v)", *certainty)}return nil}
func NewClassifyState(config ClassifyConfig, dataFiles []*utils.DataFile) *ClassifyState {hasFilter := config.Filter != "" || config.Species != "" || config.Certainty >= 0cached := make([][]*utils.Segment, len(dataFiles))for i, df := range dataFiles {if !hasFilter {cached[i] = df.Segments} else {for _, seg := range df.Segments {if seg.SegmentMatchesFilters(config.Filter, config.Species, config.CallType, config.Certainty) {cached[i] = append(cached[i], seg)}}}}total := 0for _, segs := range cached {total += len(segs)}return &ClassifyState{Config: config,DataFiles: dataFiles,filteredSegs: cached,totalSegs: total,}}
// NewClassifyState creates a ClassifyState with pre-computed filtered segments.// Used by tests that construct state directly without LoadDataFiles.func NewClassifyState(config ClassifyConfig, dataFiles []*utils.DataFile) *ClassifyState {hasFilter := config.Filter != "" || config.Species != "" || config.Certainty >= 0cached := make([][]*utils.Segment, len(dataFiles))for i, df := range dataFiles {if !hasFilter {cached[i] = df.Segments} else {for _, seg := range df.Segments {if seg.SegmentMatchesFilters(config.Filter, config.Species, config.CallType, config.Certainty) {cached[i] = append(cached[i], seg)}}}}total := 0for _, segs := range cached {total += len(segs)}return &ClassifyState{Config: config,DataFiles: dataFiles,filteredSegs: cached,totalSegs: total,}}
// GetTableRowCount returns the number of rows in a tablefunc GetTableRowCount(db *sql.DB, table string) (int64, error) {var count int64err := db.QueryRow(fmt.Sprintf("SELECT COUNT(*) FROM %s", table)).Scan(&count)if err != nil {return 0, fmt.Errorf("failed to count rows in %s: %w", table, err)}return count, nil}