V2HX6HEB2OBNI4IMWD5XJN3RKAZYHAFJAJAPFP3BFYFZVZVEYN6AC BGJRP6EHV4XU4IUTBFMGKFS6Z7ECYVKG7DLXDGOKXTV5663OXR6AC 2MZO5RDB67LA3ZM752XPJTN3IYGJL3M4UXGPUQVPVQ3CT2ZLFRFAC TSPKDAFW3LRT5UXGNSJ2MOLSILLIGUHUVS7S2CLAWRZFMVBBMD4AC XU7FTYK3YAM5TADBGEJZ44UJ6LNJ7YVABSFEXKOBGUHXBFHVLR2AC PXQDGTR53ST5T4EV6XFRCAOC7N5RQX23GWVKMJGS2J35VUQLZL4AC KZKLAINJJWZ64T5MUZT34LJVQIKBTKZ6EJGD7C7TTSSDGCHEDPMAC JZRF7OBJNERB4NIB37RSAF3ZK2A4RBWSCFV5OCRXZYVGPSNOWKTAC 43TMU2JOAE2HIWKUUPSK5LP7KLGLBVZIZHTFA43ZAXDOHU4XWZ5QC FCCJNYCVGOW6WVHYUUQ3RHHKB4QU3S4LU4AINB4VKWBOXLCPXILQC LBWQJEDHCNUNMEJWXILGBGYZUKQI7CDAMH2BD44HULM77SVH5UYQC NS4TDPLNAWJYJN37PZDYXMG6OJSAWZCMTPSPKX73JCLZZAMY25BAC ZDZDASRTTRPJRIBAMNO3TB533XFELVYJQQGAMA3WOQYY35SVAQUQC QVIGQOQZIEXLFMMAA7RTL7MQWI4MC3CH22R6YO6J7LGLHWLCSD4AC 54GPBNIXPKRBLD6GS4W6PTZTSQEO4M5HFEGMCVMBZSUFB57VGODAC LQLC7S3ADBR4O2JYVUSQJD65U3HG4ADOQBGB4F7KQCXUMNKMNEKAC JAT3DXOLENZZGXE2NYFF3TVQAQIXMMNYO234ETKQGC2CRHJVZERQC E27ZWCDPESXDEHYZONCAKYL2U4K4ZLVXWX4453ICWSH4TGMQI4KQC YVFPP5VJJSR4EVGOMB5565IZFYAVFNRH37L6AXEUFU5AP6CL7DJAC N57PNZPFM6QU5FK4SHC3473IV6IN3HRVKSPZSFIJJ5LLCAXICNIAC 3ETJ6KPIYI23DLXSKISNJSY3DUGHOACE6CPCPF6V7KJK4EXIADQAC NQPVZ3PPQG6EPTTAEHXOXXGK27HZCISHZCOZU6K6RKWTRTOHMY6QC TSOJUMHVLPASHBAVCTUK6WSGZOSBDZIC47FYILGQ2QAU7Z4BUZMAC 3DVPQOKB6BX63XSBIYYCPWBL2RBG3LXZS3XPQBANJP2FWVRAOVZQC DHIPFBFPF4F7SLMMBVHO4TUFPLFYWY4KGOYZEHOQGUQNZELRSW3AC O45G7VX2XBX2JSKKVK42BLU4UG5K77WPZOD2IYLKK5MOTSPXFFMAC VU3KBTQ6AFJV36WVQ4A7BM7Q3MLJQX4DBCGMIZJXNPMEKLIBGHZAC M34GDDTWJ5E2N2SMNO5BENI5T6IWZNDZ23ATHGBAXUGXOK2EQQ4AC SJN7IKIVTAZX3ACEWPLFVUT7P2TLB3RQBD4PKC6PEQQ33ECXFJRQC 2P27XV3DGJCRA4SNJENCJYZLPR2XWZMTY7CGYYSJOY4UMDVVO25AC QFPEKXL5OUKLT4WECMATSOHWYM24QPHKS6WZAAI5BAEQSAGAK6CQC I4CMOMXFJ3Y4AY5LPA7MDLWVHJ674IRFYLXCEXCC5ZARLCWSKCAAC package wavimport ("testing""time")type expectedTS struct {Year, Month, Day, Hour, Minute, Second int}func assertTimestamp(t *testing.T, got time.Time, want expectedTS) {t.Helper()t.Helper()if got.Year() != want.Year {t.Errorf("Year: got %d, want %d", got.Year(), want.Year)}if got.Month() != time.Month(want.Month) {t.Errorf("Month: got %d, want %d", got.Month(), want.Month)}if got.Day() != want.Day {t.Errorf("Day: got %d, want %d", got.Day(), want.Day)}if got.Hour() != want.Hour {t.Errorf("Hour: got %d, want %d", got.Hour(), want.Hour)}if got.Minute() != want.Minute {t.Errorf("Minute: got %d, want %d", got.Minute(), want.Minute)}if got.Second() != want.Second {t.Errorf("Second: got %d, want %d", got.Second(), want.Second)}}func assertOffset(t *testing.T, got time.Time, wantSeconds int) {t.Helper()_, offset := got.Zone()if offset != wantSeconds {t.Errorf("Offset: got %d seconds, want %d seconds", offset, wantSeconds)}}// parseAndApply is a test helper that parses filenames and applies a timezone offset.func parseAndApply(t *testing.T, filenames []string, tz string) []time.Time {t.Helper()parsed, err := ParseFilenameTimestamps(filenames)if err != nil {t.Fatalf("Failed to parse filenames: %v", err)}results, err := ApplyTimezoneOffset(parsed, tz)if err != nil {t.Fatalf("Failed to apply timezone: %v", err)}return results}// parseTestCase defines a table-driven test case for ParseFilenameTimestamps.type parseTestCase struct {name stringfiles []stringexpected map[int]expectedTS // index → expected timestamp}func runParseTestCase(t *testing.T, tc parseTestCase) {t.Helper()results, err := ParseFilenameTimestamps(tc.files)if err != nil {t.Fatalf("Failed to parse filenames: %v", err)}if len(results) != len(tc.files) {t.Fatalf("Expected %d results, got %d", len(tc.files), len(results))}for idx, want := range tc.expected {assertTimestamp(t, results[idx].Timestamp, want)}}func TestParseFilenameTimestamps(t *testing.T) {cases := []parseTestCase{{name: "YYMMDD format (test case a)",files: []string{"201012_123456.wav", "201014_123456.WAV", "201217_123456.wav", "211122_123456.WAV"},expected: map[int]expectedTS{0: {2020, 10, 12, 12, 34, 56}, // Year 20 → 20203: {2021, 11, 22, 12, 34, 56},},},{name: "DDMMYY format (test case b)",files: []string{"121020_123456.WAV", "141020_123456.wav", "171220_123456.WAV", "221121_123456.wav"},expected: map[int]expectedTS{0: {2020, 10, 12, 12, 34, 56},2: {2020, 12, 17, 12, 34, 56},},},{name: "YYYYMMDD format (test case c)",files: []string{"20230609_103000.WAV", "20241109_201504.wav"},expected: map[int]expectedTS{0: {2023, 6, 9, 10, 30, 0},1: {2024, 11, 9, 20, 15, 4},},},{name: "6-digit with variance detection (test case d)",files: []string{"120119_003002.wav", "180120_231502.wav", "170122_010005.wav", "010419_234502.WAV", "310320_231502.wav", "220824_231502.WAV", "240123_231502.wav"},expected: map[int]expectedTS{0: {2019, 1, 12, 0, 30, 2}, // DDMMYY4: {2020, 3, 31, 23, 15, 2},},},{name: "prefixes (test case e)",files: []string{"XYZ123_7689_20230609_103000.WAV", "string 20241109_201504.wav"},expected: map[int]expectedTS{0: {2023, 6, 9, 10, 30, 0},1: {2024, 11, 9, 20, 15, 4},},},{name: "complex prefixes (test case f)",files: []string{"abcdefg__1234_180120_231502.wav", "string 120119_003002.wav", "ABCD EFG___170122_010005.wav", "BHD_1234 010419_234502.WAV", "cill xyz 310320_231502.wav", "220824_231502.WAV", "240123_231502.wav"},expected: map[int]expectedTS{0: {2020, 1, 18, 23, 15, 2},1: {2019, 1, 12, 0, 30, 2},4: {2020, 3, 31, 23, 15, 2},},},}for _, tc := range cases {t.Run(tc.name, func(t *testing.T) {runParseTestCase(t, tc)})}}func TestParseFilenameTimestampsErrors(t *testing.T) {t.Run("should throw error for empty filename array", func(t *testing.T) {_, err := ParseFilenameTimestamps([]string{})if err == nil {t.Error("Expected error for empty filename array")}if err != nil && err.Error() != "no filenames provided" {t.Logf("Error message: %v", err)}})t.Run("should throw error for filenames without date patterns", func(t *testing.T) {_, err := ParseFilenameTimestamps([]string{"invalid_filename.wav"})if err == nil {t.Error("Expected error for filenames without date patterns")}})t.Run("should throw error for mixed date formats", func(t *testing.T) {mixedFormats := []string{"201012_123456.wav", "20231012_123456.wav"} // 6-digit vs 8-digit_, err := ParseFilenameTimestamps(mixedFormats)if err == nil {t.Error("Expected error for mixed date formats")}})t.Run("should throw error for wrong length patterns", func(t *testing.T) {wrongLength := []string{"2010_123456.wav"} // 4 digits instead of 6 or 8_, err := ParseFilenameTimestamps(wrongLength)if err == nil {t.Error("Expected error for wrong length patterns")}})t.Run("should throw error when not enough files for 6-digit disambiguation", func(t *testing.T) {singleFile := []string{"120119_003002.wav"}_, err := ParseFilenameTimestamps(singleFile)if err == nil {t.Error("Expected error when not enough files for 6-digit disambiguation")}})}func TestApplyTimezoneOffset(t *testing.T) {t.Run("should apply UTC timezone correctly", func(t *testing.T) {results := parseAndApply(t, []string{"201012_123456.wav", "201014_123456.WAV"}, "UTC")if len(results) != 2 {t.Fatalf("Expected 2 results, got %d", len(results))}assertOffset(t, results[0], 0)})t.Run("should use fixed offset for entire cluster spanning DST transition", func(t *testing.T) {// Auckland DST ended April 4, 2021 (UTC+13 -> UTC+12)results := parseAndApply(t, []string{"20210401_120000.wav", // April 1st - DST active (UTC+13)"20210410_120000.wav", // April 10th - DST ended (would be UTC+12 if DST applied)"20210420_120000.wav", // April 20th - Standard time}, "Pacific/Auckland")if len(results) != 3 {t.Fatalf("Expected 3 results, got %d", len(results))}// All files should use UTC+13 offset (from earliest file: April 1st)for _, r := range results {assertOffset(t, r, 13*3600)}// All at 12:00 local - 13h = 23:00 UTC previous dayassertTimestamp(t, results[0].UTC(), expectedTS{2021, 3, 31, 23, 0, 0})assertTimestamp(t, results[1].UTC(), expectedTS{2021, 4, 9, 23, 0, 0})assertTimestamp(t, results[2].UTC(), expectedTS{2021, 4, 19, 23, 0, 0})})t.Run("should handle out-of-order filenames correctly", func(t *testing.T) {results := parseAndApply(t, []string{"20210410_120000.wav", // April 10th (later)"20210401_120000.wav", // April 1st (earliest - determines offset)"20210405_120000.wav", // April 5th (middle)}, "Pacific/Auckland")// All files use UTC+13 (from April 1st, the earliest)for _, r := range results {assertOffset(t, r, 13*3600)}// Results maintain original filename orderassertTimestamp(t, results[0], expectedTS{2021, 4, 10, 12, 0, 0})assertTimestamp(t, results[1], expectedTS{2021, 4, 1, 12, 0, 0})assertTimestamp(t, results[2], expectedTS{2021, 4, 5, 12, 0, 0})})t.Run("should apply fixed offset consistently across large time spans", func(t *testing.T) {results := parseAndApply(t, []string{"20210215_120000.wav", // February (summer, UTC+13)"20210615_120000.wav", // June (winter, would be UTC+12 if DST applied)"20210815_120000.wav", // August (winter)}, "Pacific/Auckland")// All files use offset from earliest (February): UTC+13for _, r := range results {assertOffset(t, r, 13*3600)}// 12:00 local - 13h = 23:00 UTC previous dayassertTimestamp(t, results[0].UTC(), expectedTS{2021, 2, 14, 23, 0, 0})assertTimestamp(t, results[1].UTC(), expectedTS{2021, 6, 14, 23, 0, 0})assertTimestamp(t, results[2].UTC(), expectedTS{2021, 8, 14, 23, 0, 0})})t.Run("should handle US DST transitions with fixed offset", func(t *testing.T) {results := parseAndApply(t, []string{"20210310_120000.wav", // March 10th - before DST (UTC-5)"20210320_120000.wav", // March 20th - after DST (would be UTC-4)}, "America/New_York")// All files use offset from earliest (March 10th): UTC-5for _, r := range results {assertOffset(t, r, -5*3600)}// 12:00 local + 5h = 17:00 UTCassertTimestamp(t, results[0].UTC(), expectedTS{2021, 3, 10, 17, 0, 0})assertTimestamp(t, results[1].UTC(), expectedTS{2021, 3, 20, 17, 0, 0})})t.Run("should handle empty timestamps array", func(t *testing.T) {_, err := ApplyTimezoneOffset([]FilenameTimestamp{}, "UTC")if err == nil {t.Error("Expected error for empty timestamps array")}})t.Run("should handle invalid timezone", func(t *testing.T) {filenames := []string{"20210401_120000.wav"}parsed, err := ParseFilenameTimestamps(filenames)if err != nil {t.Fatalf("Failed to parse filenames: %v", err)}_, err = ApplyTimezoneOffset(parsed, "Invalid/Timezone")if err == nil {t.Error("Expected error for invalid timezone")}})}func TestHasTimestampFilename(t *testing.T) {testCases := []struct {filename stringexpected bool}{{"201012_123456.wav", true},{"20230609_103000.WAV", true},{"invalid_filename.wav", false},{"201012_123456.txt", false},{"201012.wav", false},{"_123456.wav", false},{"", false},}for _, tc := range testCases {t.Run(tc.filename, func(t *testing.T) {result := HasTimestampFilename(tc.filename)if result != tc.expected {t.Errorf("HasTimestampFilename(%q) = %v, want %v", tc.filename, result, tc.expected)}})}}func TestFilenameParserEdgeCases(t *testing.T) {t.Run("should handle case-insensitive file extensions", func(t *testing.T) {filenames := []string{"201012_123456.wav","201014_123456.WAV","201217_123456.Wav",}results, err := ParseFilenameTimestamps(filenames)if err != nil {t.Fatalf("Failed to parse filenames: %v", err)}if len(results) != 3 {t.Errorf("Expected 3 results, got %d", len(results))}})t.Run("should validate invalid dates", func(t *testing.T) {// 32nd day doesn't exist - should be caught by validationfilenames := []string{"20240132_120000.wav"}_, err := ParseFilenameTimestamps(filenames)if err == nil {t.Error("Expected error for invalid date (day 32)")}})t.Run("should validate invalid months", func(t *testing.T) {// 13th month doesn't existfilenames := []string{"20241301_120000.wav"}_, err := ParseFilenameTimestamps(filenames)if err == nil {t.Error("Expected error for invalid month (13)")}})t.Run("should handle February 29th in leap year", func(t *testing.T) {filenames := []string{"20240229_120000.wav"} // 2024 is a leap yearresults, err := ParseFilenameTimestamps(filenames)if err != nil {t.Fatalf("Failed to parse leap year date: %v", err)}if results[0].Timestamp.Day() != 29 {t.Errorf("Expected day 29, got %d", results[0].Timestamp.Day())}})t.Run("should reject February 29th in non-leap year", func(t *testing.T) {filenames := []string{"20230229_120000.wav"} // 2023 is not a leap year_, err := ParseFilenameTimestamps(filenames)if err == nil {t.Error("Expected error for Feb 29th in non-leap year")}})}func TestUTCConversionCorrectness(t *testing.T) {t.Run("should convert Pacific/Auckland night recordings correctly to UTC", func(t *testing.T) {// 21:00 Pacific/Auckland (May = UTC+12) → 09:00 UTC same dayresults := parseAndApply(t, []string{"20210505_210000.wav"}, "Pacific/Auckland")assertTimestamp(t, results[0].UTC(), expectedTS{2021, 5, 5, 9, 0, 0})})t.Run("should convert day recordings correctly to UTC", func(t *testing.T) {// 12:00 Pacific/Auckland (May = UTC+12) → 00:00 UTC same dayresults := parseAndApply(t, []string{"20210505_120000.wav"}, "Pacific/Auckland")assertTimestamp(t, results[0].UTC(), expectedTS{2021, 5, 5, 0, 0, 0})})t.Run("should handle date rollover correctly", func(t *testing.T) {// 02:00 Pacific/Auckland (May = UTC+12) → 14:00 UTC previous dayresults := parseAndApply(t, []string{"20210505_020000.wav"}, "Pacific/Auckland")assertTimestamp(t, results[0].UTC(), expectedTS{2021, 5, 4, 14, 0, 0})})t.Run("should convert correctly for negative offset timezone", func(t *testing.T) {// 15:00 New York (June = UTC-4 during DST) → 19:00 UTC same dayresults := parseAndApply(t, []string{"20210615_150000.wav"}, "America/New_York")assertTimestamp(t, results[0].UTC(), expectedTS{2021, 6, 15, 19, 0, 0})})}
package wavimport ("fmt""path/filepath""regexp""strconv""time")// DateFormat represents the detected filename date formattype DateFormat int// Date format constants for filename timestamp parsingconst (Format8Digit DateFormat = iota // YYYYMMDD_HHMMSS (e.g., 20230609_103000.wav)Format6YYMMDD // YYMMDD_HHMMSS (e.g., 201012_123456.wav) - year firstFormat6DDMMYY // DDMMYY_HHMMSS (e.g., 121020_123456.wav) - year last)var (// Pattern to match timestamp filenames// Supports: YYYYMMDD_HHMMSS, YYMMDD_HHMMSS, DDMMYY_HHMMSS// Case-insensitive for file extension (.wav, .WAV, .Wav)// Allows prefixes before the timestamp pattern// Allows optional suffixes between timestamp and extension (e.g., _16kHz)timestampPattern = regexp.MustCompile(`(?i)(\d{6,8})_(\d{6})(?:_[^/\\]*)?\.wav$`))// dateParts represents parsed date components for format detectiontype dateParts struct {x1 int // First 2 digitsm int // Middle 2 digits (always month)x2 int // Last 2 digits}// FilenameTimestamp represents a parsed timestamp from a filenametype FilenameTimestamp struct {Filename stringTimestamp time.TimeFormat DateFormat}// ParseFilenameTimestamps extracts timestamps from a batch of filenames using variance-based format detection.// Uses variance-based disambiguation for 6-digit dates (YYMMDD vs DDMMYY).// Returns timestamps in UTC (timezone must be applied separately).func ParseFilenameTimestamps(filenames []string) ([]FilenameTimestamp, error) {if len(filenames) == 0 {return nil, fmt.Errorf("no filenames provided")}// Detect date format by analyzing all filenamesformat, err := detectDateFormat(filenames)if err != nil {return nil, err}// Parse all filenames using detected formatresults := make([]FilenameTimestamp, 0, len(filenames))for _, filename := range filenames {timestamp, err := parseFilenameWithFormat(filename, format)if err != nil {return nil, fmt.Errorf("failed to parse %s: %w", filename, err)}results = append(results, FilenameTimestamp{Filename: filename,Timestamp: timestamp,Format: format,})}return results, nil}// ApplyTimezoneOffset converts local timestamps to a location timezone with DST handling.// Uses the EARLIEST (chronologically) timestamp to determine the offset, then applies it to all.// This matches AudioMoth behavior (no DST adjustment during deployment).func ApplyTimezoneOffset(timestamps []FilenameTimestamp, timezoneID string) ([]time.Time, error) {if len(timestamps) == 0 {return nil, fmt.Errorf("no timestamps provided")}// Load timezone locationloc, err := time.LoadLocation(timezoneID)if err != nil {return nil, fmt.Errorf("invalid timezone %s: %w", timezoneID, err)}// Find chronologically earliest timestampearliestUTC := timestamps[0].Timestampfor _, ts := range timestamps[1:] {if ts.Timestamp.Before(earliestUTC) {earliestUTC = ts.Timestamp}}// Calculate offset from earliest timestampearliestInZone := time.Date(earliestUTC.Year(), earliestUTC.Month(), earliestUTC.Day(),earliestUTC.Hour(), earliestUTC.Minute(), earliestUTC.Second(),0, loc,)// Get fixed offset (doesn't change for DST)_, offsetSeconds := earliestInZone.Zone()fixedOffset := time.FixedZone("Fixed", offsetSeconds)// Apply SAME offset to ALL timestamps (maintaining original order)results := make([]time.Time, len(timestamps))for i, ts := range timestamps {adjusted := time.Date(ts.Timestamp.Year(), ts.Timestamp.Month(), ts.Timestamp.Day(),ts.Timestamp.Hour(), ts.Timestamp.Minute(), ts.Timestamp.Second(),0, fixedOffset,)results[i] = adjusted}return results, nil}// detectDateFormat analyzes filenames to determine the date formatfunc detectDateFormat(filenames []string) (DateFormat, error) {// Extract all date parts from filenamesvar parts []datePartsvar has8Digit boolfor _, filename := range filenames {basename := filepath.Base(filename)matches := timestampPattern.FindStringSubmatch(basename)if matches == nil {continue}dateStr := matches[1]// Check for 8-digit format (YYYYMMDD)if len(dateStr) == 8 {has8Digit = truecontinue}// Parse 6-digit formatif len(dateStr) == 6 {x1, _ := strconv.Atoi(dateStr[0:2])m, _ := strconv.Atoi(dateStr[2:4])x2, _ := strconv.Atoi(dateStr[4:6])parts = append(parts, dateParts{x1: x1, m: m, x2: x2})}}// If all files are 8-digit, that's the formatif has8Digit && len(parts) == 0 {return Format8Digit, nil}// If mixed 8-digit and 6-digit, return errorif has8Digit && len(parts) > 0 {return 0, fmt.Errorf("mixed date formats detected (8-digit and 6-digit)")}// If no 6-digit dates found, cannot determineif len(parts) == 0 {return 0, fmt.Errorf("no valid timestamp filenames found")}// Need at least 2 files with different dates to disambiguate YYMMDD vs DDMMYYif len(parts) == 1 {return 0, fmt.Errorf("need at least 2 files to disambiguate 6-digit date format (YYMMDD vs DDMMYY)")}// Use variance-based disambiguation for 6-digit dates// Compare uniqueness of x1 (first 2 digits) vs x2 (last 2 digits)// Day values vary more than year values across recordingsuniqueX1 := countUnique(parts, func(p dateParts) int { return p.x1 })uniqueX2 := countUnique(parts, func(p dateParts) int { return p.x2 })if uniqueX2 >= uniqueX1 {// x2 has more variance → likely day values → YYMMDD formatreturn Format6YYMMDD, nil} else {// x1 has more variance → likely day values → DDMMYY formatreturn Format6DDMMYY, nil}}// parseFilenameWithFormat parses a filename using the specified formatfunc parseFilenameWithFormat(filename string, format DateFormat) (time.Time, error) {basename := filepath.Base(filename)matches := timestampPattern.FindStringSubmatch(basename)if matches == nil {return time.Time{}, fmt.Errorf("filename does not match timestamp pattern: %s", basename)}dateStr := matches[1]timeStr := matches[2]var year, month, day intswitch format {case Format8Digit:if len(dateStr) != 8 {return time.Time{}, fmt.Errorf("expected 8-digit date, got %d digits", len(dateStr))}year, _ = strconv.Atoi(dateStr[0:4])month, _ = strconv.Atoi(dateStr[4:6])day, _ = strconv.Atoi(dateStr[6:8])case Format6YYMMDD:if len(dateStr) != 6 {return time.Time{}, fmt.Errorf("expected 6-digit date, got %d digits", len(dateStr))}yy, _ := strconv.Atoi(dateStr[0:2])month, _ = strconv.Atoi(dateStr[2:4])day, _ = strconv.Atoi(dateStr[4:6])// Convert 2-digit year to 4-digit (assume 2000-2099)year = 2000 + yycase Format6DDMMYY:if len(dateStr) != 6 {return time.Time{}, fmt.Errorf("expected 6-digit date, got %d digits", len(dateStr))}day, _ = strconv.Atoi(dateStr[0:2])month, _ = strconv.Atoi(dateStr[2:4])yy, _ := strconv.Atoi(dateStr[4:6])// Convert 2-digit year to 4-digit (assume 2000-2099)year = 2000 + yy}// Parse time (HHMMSS)if len(timeStr) != 6 {return time.Time{}, fmt.Errorf("invalid time format: %s", timeStr)}hour, _ := strconv.Atoi(timeStr[0:2])minute, _ := strconv.Atoi(timeStr[2:4])second, _ := strconv.Atoi(timeStr[4:6])// Construct timestamp in UTC (timezone applied separately)timestamp := time.Date(year, time.Month(month), day, hour, minute, second, 0, time.UTC)// Validate dateif timestamp.Month() != time.Month(month) || timestamp.Day() != day {return time.Time{}, fmt.Errorf("invalid date: %04d-%02d-%02d", year, month, day)}return timestamp, nil}// countUnique counts unique values using an extractor functionfunc countUnique(parts []dateParts, extractor func(p dateParts) int) int {seen := make(map[int]bool)for _, p := range parts {seen[extractor(p)] = true}return len(seen)}// HasTimestampFilename checks if a filename contains a timestamp patternfunc HasTimestampFilename(filename string) bool {basename := filepath.Base(filename)return timestampPattern.MatchString(basename)}
} else if utils.HasTimestampFilename(filePath) {filenameTimestamps, err := utils.ParseFilenameTimestamps([]string{filepath.Base(filePath)})
} else if HasTimestampFilename(filePath) {filenameTimestamps, err := ParseFilenameTimestamps([]string{filepath.Base(filePath)})
package utilsimport "strings"// Placeholders generates SQL placeholder string for IN clauses (e.g. "?, ?, ?")func Placeholders(n int) string {if n == 0 {return ""}ph := make([]string, n)for i := range ph {ph[i] = "?"}return strings.Join(ph, ", ")}
package utilsimport "testing"func TestPlaceholders(t *testing.T) {tests := []struct {n intwant string}{{0, ""},{1, "?"},{3, "?, ?, ?"},{5, "?, ?, ?, ?, ?"},}for _, tt := range tests {t.Run(string(rune('0'+tt.n)), func(t *testing.T) {got := Placeholders(tt.n)if got != tt.want {t.Errorf("Placeholders(%d) = %q, want %q", tt.n, got, tt.want)}})}}
package utilsimport ("os""path/filepath""sort""testing")func TestFindDataFiles_Basic(t *testing.T) {dir := t.TempDir()// Create some .data filesfor _, name := range []string{"a.data", "b.data", "c.data"} {if err := os.WriteFile(filepath.Join(dir, name), []byte("[]"), 0644); err != nil {t.Fatal(err)}}// Create a non-.data file that should be ignoredif err := os.WriteFile(filepath.Join(dir, "notes.txt"), []byte("ignore"), 0644); err != nil {t.Fatal(err)}files, err := FindDataFiles(dir)if err != nil {t.Fatal(err)}sort.Strings(files)if len(files) != 3 {t.Fatalf("expected 3 files, got %d: %v", len(files), files)}for i, base := range []string{"a.data", "b.data", "c.data"} {expected := filepath.Join(dir, base)if files[i] != expected {t.Errorf("file %d: got %q, want %q", i, files[i], expected)}}}func TestFindDataFiles_SkipsHidden(t *testing.T) {dir := t.TempDir()// Regular .data fileif err := os.WriteFile(filepath.Join(dir, "visible.data"), []byte("[]"), 0644); err != nil {t.Fatal(err)}// Hidden .data file (should be skipped)if err := os.WriteFile(filepath.Join(dir, ".hidden.data"), []byte("[]"), 0644); err != nil {t.Fatal(err)}files, err := FindDataFiles(dir)if err != nil {t.Fatal(err)}if len(files) != 1 {t.Fatalf("expected 1 file (hidden skipped), got %d: %v", len(files), files)}if filepath.Base(files[0]) != "visible.data" {t.Errorf("got %q, want visible.data", files[0])}}func TestFindDataFiles_NonRecursive(t *testing.T) {dir := t.TempDir()// .data file in rootif err := os.WriteFile(filepath.Join(dir, "root.data"), []byte("[]"), 0644); err != nil {t.Fatal(err)}// .data file in subdirectory (should NOT be found)sub := filepath.Join(dir, "subdir")if err := os.Mkdir(sub, 0755); err != nil {t.Fatal(err)}if err := os.WriteFile(filepath.Join(sub, "nested.data"), []byte("[]"), 0644); err != nil {t.Fatal(err)}files, err := FindDataFiles(dir)if err != nil {t.Fatal(err)}if len(files) != 1 {t.Fatalf("expected 1 file (non-recursive), got %d: %v", len(files), files)}if filepath.Base(files[0]) != "root.data" {t.Errorf("got %q, want root.data", files[0])}}func TestFindDataFiles_EmptyDir(t *testing.T) {dir := t.TempDir()files, err := FindDataFiles(dir)if err != nil {t.Fatal(err)}if len(files) != 0 {t.Errorf("expected 0 files, got %d", len(files))}}func TestFindDataFiles_NonexistentDir(t *testing.T) {_, err := FindDataFiles("/nonexistent/path/12345")if err == nil {t.Error("expected error for nonexistent directory")}}func TestFindDataFiles_NoDataFiles(t *testing.T) {dir := t.TempDir()if err := os.WriteFile(filepath.Join(dir, "readme.txt"), []byte("hello"), 0644); err != nil {t.Fatal(err)}files, err := FindDataFiles(dir)if err != nil {t.Fatal(err)}if len(files) != 0 {t.Errorf("expected 0 files, got %d", len(files))}}
package utilsimport ("os""path/filepath""testing")func TestLoadConfig(t *testing.T) {homeDir := t.TempDir()t.Setenv("HOME", homeDir)configDir := filepath.Join(homeDir, ".skraak")err := os.MkdirAll(configDir, 0755)if err != nil {t.Fatalf("failed to create config dir: %v", err)}jsonContent := `{"classify": {"reviewer": "Test Reviewer","color": true}}`err = os.WriteFile(filepath.Join(configDir, "config.json"), []byte(jsonContent), 0644)if err != nil {t.Fatalf("failed to write config: %v", err)}cfg, path, err := LoadConfig()if err != nil {t.Fatalf("unexpected error: %v", err)}if cfg.Classify.Reviewer != "Test Reviewer" {t.Errorf("expected Test Reviewer, got %s", cfg.Classify.Reviewer)}if !cfg.Classify.Color {t.Error("expected color to be true")}if path == "" {t.Error("expected path to be returned")}}
package utilsimport ("testing")type expectedTS struct {Year, Month, Day, Hour, Minute, Second int}func assertTimestamp(t *testing.T, got time.Time, want expectedTS) {t.Helper()t.Helper()if got.Year() != want.Year {t.Errorf("Year: got %d, want %d", got.Year(), want.Year)}if got.Month() != time.Month(want.Month) {t.Errorf("Month: got %d, want %d", got.Month(), want.Month)}if got.Day() != want.Day {t.Errorf("Day: got %d, want %d", got.Day(), want.Day)}if got.Hour() != want.Hour {t.Errorf("Hour: got %d, want %d", got.Hour(), want.Hour)}if got.Minute() != want.Minute {t.Errorf("Minute: got %d, want %d", got.Minute(), want.Minute)}if got.Second() != want.Second {t.Errorf("Second: got %d, want %d", got.Second(), want.Second)}}func assertOffset(t *testing.T, got time.Time, wantSeconds int) {t.Helper()_, offset := got.Zone()if offset != wantSeconds {t.Errorf("Offset: got %d seconds, want %d seconds", offset, wantSeconds)}}// parseAndApply is a test helper that parses filenames and applies a timezone offset.func parseAndApply(t *testing.T, filenames []string, tz string) []time.Time {t.Helper()parsed, err := ParseFilenameTimestamps(filenames)if err != nil {t.Fatalf("Failed to parse filenames: %v", err)}results, err := ApplyTimezoneOffset(parsed, tz)if err != nil {t.Fatalf("Failed to apply timezone: %v", err)}return results}}func TestParseFilenameTimestampsErrors(t *testing.T) {t.Run("should throw error for empty filename array", func(t *testing.T) {_, err := ParseFilenameTimestamps([]string{})if err == nil {t.Error("Expected error for empty filename array")}if err != nil && err.Error() != "no filenames provided" {t.Logf("Error message: %v", err)}})t.Run("should throw error for filenames without date patterns", func(t *testing.T) {_, err := ParseFilenameTimestamps([]string{"invalid_filename.wav"})if err == nil {t.Error("Expected error for filenames without date patterns")}})t.Run("should throw error for mixed date formats", func(t *testing.T) {mixedFormats := []string{"201012_123456.wav", "20231012_123456.wav"} // 6-digit vs 8-digit_, err := ParseFilenameTimestamps(mixedFormats)if err == nil {t.Error("Expected error for mixed date formats")}})t.Run("should throw error for wrong length patterns", func(t *testing.T) {wrongLength := []string{"2010_123456.wav"} // 4 digits instead of 6 or 8_, err := ParseFilenameTimestamps(wrongLength)if err == nil {t.Error("Expected error for wrong length patterns")}})t.Run("should throw error when not enough files for 6-digit disambiguation", func(t *testing.T) {singleFile := []string{"120119_003002.wav"}_, err := ParseFilenameTimestamps(singleFile)if err == nil {t.Error("Expected error when not enough files for 6-digit disambiguation")}})}func TestApplyTimezoneOffset(t *testing.T) {t.Run("should apply UTC timezone correctly", func(t *testing.T) {results := parseAndApply(t, []string{"201012_123456.wav", "201014_123456.WAV"}, "UTC")if len(results) != 2 {t.Fatalf("Expected 2 results, got %d", len(results))}assertOffset(t, results[0], 0)})t.Run("should use fixed offset for entire cluster spanning DST transition", func(t *testing.T) {// Auckland DST ended April 4, 2021 (UTC+13 -> UTC+12)results := parseAndApply(t, []string{"20210401_120000.wav", // April 1st - DST active (UTC+13)"20210410_120000.wav", // April 10th - DST ended (would be UTC+12 if DST applied)"20210420_120000.wav", // April 20th - Standard time}, "Pacific/Auckland")if len(results) != 3 {t.Fatalf("Expected 3 results, got %d", len(results))}// All files should use UTC+13 offset (from earliest file: April 1st)for _, r := range results {assertOffset(t, r, 13*3600)}// All at 12:00 local - 13h = 23:00 UTC previous dayassertTimestamp(t, results[0].UTC(), expectedTS{2021, 3, 31, 23, 0, 0})assertTimestamp(t, results[1].UTC(), expectedTS{2021, 4, 9, 23, 0, 0})assertTimestamp(t, results[2].UTC(), expectedTS{2021, 4, 19, 23, 0, 0})})t.Run("should handle out-of-order filenames correctly", func(t *testing.T) {results := parseAndApply(t, []string{"20210410_120000.wav", // April 10th (later)"20210401_120000.wav", // April 1st (earliest - determines offset)"20210405_120000.wav", // April 5th (middle)}, "Pacific/Auckland")// All files use UTC+13 (from April 1st, the earliest)for _, r := range results {assertOffset(t, r, 13*3600)}// Results maintain original filename orderassertTimestamp(t, results[0], expectedTS{2021, 4, 10, 12, 0, 0})assertTimestamp(t, results[1], expectedTS{2021, 4, 1, 12, 0, 0})assertTimestamp(t, results[2], expectedTS{2021, 4, 5, 12, 0, 0})})t.Run("should apply fixed offset consistently across large time spans", func(t *testing.T) {results := parseAndApply(t, []string{"20210215_120000.wav", // February (summer, UTC+13)"20210615_120000.wav", // June (winter, would be UTC+12 if DST applied)"20210815_120000.wav", // August (winter)}, "Pacific/Auckland")// All files use offset from earliest (February): UTC+13for _, r := range results {assertOffset(t, r, 13*3600)}// 12:00 local - 13h = 23:00 UTC previous dayassertTimestamp(t, results[0].UTC(), expectedTS{2021, 2, 14, 23, 0, 0})assertTimestamp(t, results[1].UTC(), expectedTS{2021, 6, 14, 23, 0, 0})assertTimestamp(t, results[2].UTC(), expectedTS{2021, 8, 14, 23, 0, 0})})t.Run("should handle US DST transitions with fixed offset", func(t *testing.T) {results := parseAndApply(t, []string{"20210310_120000.wav", // March 10th - before DST (UTC-5)"20210320_120000.wav", // March 20th - after DST (would be UTC-4)}, "America/New_York")// All files use offset from earliest (March 10th): UTC-5for _, r := range results {assertOffset(t, r, -5*3600)}// 12:00 local + 5h = 17:00 UTCassertTimestamp(t, results[0].UTC(), expectedTS{2021, 3, 10, 17, 0, 0})assertTimestamp(t, results[1].UTC(), expectedTS{2021, 3, 20, 17, 0, 0})})t.Run("should handle empty timestamps array", func(t *testing.T) {_, err := ApplyTimezoneOffset([]FilenameTimestamp{}, "UTC")if err == nil {t.Error("Expected error for empty timestamps array")}})t.Run("should handle invalid timezone", func(t *testing.T) {filenames := []string{"20210401_120000.wav"}parsed, err := ParseFilenameTimestamps(filenames)if err != nil {t.Fatalf("Failed to parse filenames: %v", err)}_, err = ApplyTimezoneOffset(parsed, "Invalid/Timezone")if err == nil {t.Error("Expected error for invalid timezone")}})}func TestHasTimestampFilename(t *testing.T) {testCases := []struct {filename stringexpected bool}{{"201012_123456.wav", true},{"20230609_103000.WAV", true},{"invalid_filename.wav", false},{"201012_123456.txt", false},{"201012.wav", false},{"_123456.wav", false},{"", false},}for _, tc := range testCases {t.Run(tc.filename, func(t *testing.T) {result := HasTimestampFilename(tc.filename)if result != tc.expected {t.Errorf("HasTimestampFilename(%q) = %v, want %v", tc.filename, result, tc.expected)}})}}func TestFilenameParserEdgeCases(t *testing.T) {t.Run("should handle case-insensitive file extensions", func(t *testing.T) {filenames := []string{"201012_123456.wav","201014_123456.WAV","201217_123456.Wav",}results, err := ParseFilenameTimestamps(filenames)if err != nil {t.Fatalf("Failed to parse filenames: %v", err)}if len(results) != 3 {t.Errorf("Expected 3 results, got %d", len(results))}})t.Run("should validate invalid dates", func(t *testing.T) {// 32nd day doesn't exist - should be caught by validationfilenames := []string{"20240132_120000.wav"}_, err := ParseFilenameTimestamps(filenames)if err == nil {t.Error("Expected error for invalid date (day 32)")}})t.Run("should validate invalid months", func(t *testing.T) {// 13th month doesn't existfilenames := []string{"20241301_120000.wav"}_, err := ParseFilenameTimestamps(filenames)if err == nil {t.Error("Expected error for invalid month (13)")}})t.Run("should handle February 29th in leap year", func(t *testing.T) {filenames := []string{"20240229_120000.wav"} // 2024 is a leap yearresults, err := ParseFilenameTimestamps(filenames)if err != nil {t.Fatalf("Failed to parse leap year date: %v", err)}if results[0].Timestamp.Day() != 29 {t.Errorf("Expected day 29, got %d", results[0].Timestamp.Day())}})t.Run("should reject February 29th in non-leap year", func(t *testing.T) {filenames := []string{"20230229_120000.wav"} // 2023 is not a leap year_, err := ParseFilenameTimestamps(filenames)if err == nil {t.Error("Expected error for Feb 29th in non-leap year")}})}func TestUTCConversionCorrectness(t *testing.T) {t.Run("should convert Pacific/Auckland night recordings correctly to UTC", func(t *testing.T) {// 21:00 Pacific/Auckland (May = UTC+12) → 09:00 UTC same dayresults := parseAndApply(t, []string{"20210505_210000.wav"}, "Pacific/Auckland")assertTimestamp(t, results[0].UTC(), expectedTS{2021, 5, 5, 9, 0, 0})})t.Run("should convert day recordings correctly to UTC", func(t *testing.T) {// 12:00 Pacific/Auckland (May = UTC+12) → 00:00 UTC same dayresults := parseAndApply(t, []string{"20210505_120000.wav"}, "Pacific/Auckland")assertTimestamp(t, results[0].UTC(), expectedTS{2021, 5, 5, 0, 0, 0})})t.Run("should handle date rollover correctly", func(t *testing.T) {// 02:00 Pacific/Auckland (May = UTC+12) → 14:00 UTC previous dayresults := parseAndApply(t, []string{"20210505_020000.wav"}, "Pacific/Auckland")assertTimestamp(t, results[0].UTC(), expectedTS{2021, 5, 4, 14, 0, 0})})t.Run("should convert correctly for negative offset timezone", func(t *testing.T) {// 15:00 New York (June = UTC-4 during DST) → 19:00 UTC same dayresults := parseAndApply(t, []string{"20210615_150000.wav"}, "America/New_York")assertTimestamp(t, results[0].UTC(), expectedTS{2021, 6, 15, 19, 0, 0})})}for _, tc := range cases {t.Run(tc.name, func(t *testing.T) {runParseTestCase(t, tc)})}func TestParseFilenameTimestamps(t *testing.T) {cases := []parseTestCase{{name: "YYMMDD format (test case a)",files: []string{"201012_123456.wav", "201014_123456.WAV", "201217_123456.wav", "211122_123456.WAV"},expected: map[int]expectedTS{0: {2020, 10, 12, 12, 34, 56}, // Year 20 → 20203: {2021, 11, 22, 12, 34, 56},},},{name: "DDMMYY format (test case b)",files: []string{"121020_123456.WAV", "141020_123456.wav", "171220_123456.WAV", "221121_123456.wav"},expected: map[int]expectedTS{0: {2020, 10, 12, 12, 34, 56},2: {2020, 12, 17, 12, 34, 56},},},{name: "YYYYMMDD format (test case c)",files: []string{"20230609_103000.WAV", "20241109_201504.wav"},expected: map[int]expectedTS{0: {2023, 6, 9, 10, 30, 0},1: {2024, 11, 9, 20, 15, 4},},},{name: "6-digit with variance detection (test case d)",files: []string{"120119_003002.wav", "180120_231502.wav", "170122_010005.wav", "010419_234502.WAV", "310320_231502.wav", "220824_231502.WAV", "240123_231502.wav"},expected: map[int]expectedTS{0: {2019, 1, 12, 0, 30, 2}, // DDMMYY4: {2020, 3, 31, 23, 15, 2},},},{name: "prefixes (test case e)",files: []string{"XYZ123_7689_20230609_103000.WAV", "string 20241109_201504.wav"},expected: map[int]expectedTS{0: {2023, 6, 9, 10, 30, 0},1: {2024, 11, 9, 20, 15, 4},},},{name: "complex prefixes (test case f)",files: []string{"abcdefg__1234_180120_231502.wav", "string 120119_003002.wav", "ABCD EFG___170122_010005.wav", "BHD_1234 010419_234502.WAV", "cill xyz 310320_231502.wav", "220824_231502.WAV", "240123_231502.wav"},expected: map[int]expectedTS{0: {2020, 1, 18, 23, 15, 2},1: {2019, 1, 12, 0, 30, 2},4: {2020, 3, 31, 23, 15, 2},},},}func runParseTestCase(t *testing.T, tc parseTestCase) {t.Helper()results, err := ParseFilenameTimestamps(tc.files)if err != nil {t.Fatalf("Failed to parse filenames: %v", err)}if len(results) != len(tc.files) {t.Fatalf("Expected %d results, got %d", len(tc.files), len(results))}for idx, want := range tc.expected {assertTimestamp(t, results[idx].Timestamp, want)}}// parseTestCase defines a table-driven test case for ParseFilenameTimestamps.type parseTestCase struct {name stringfiles []stringexpected map[int]expectedTS // index → expected timestamp}"time"
package utilsimport ("fmt""path/filepath""regexp""strconv""time")// DateFormat represents the detected filename date formattype DateFormat int// Date format constants for filename timestamp parsingconst (Format8Digit DateFormat = iota // YYYYMMDD_HHMMSS (e.g., 20230609_103000.wav)Format6YYMMDD // YYMMDD_HHMMSS (e.g., 201012_123456.wav) - year firstFormat6DDMMYY // DDMMYY_HHMMSS (e.g., 121020_123456.wav) - year last)var (// Pattern to match timestamp filenames// Supports: YYYYMMDD_HHMMSS, YYMMDD_HHMMSS, DDMMYY_HHMMSS// Case-insensitive for file extension (.wav, .WAV, .Wav)// Allows prefixes before the timestamp pattern// Allows optional suffixes between timestamp and extension (e.g., _16kHz)timestampPattern = regexp.MustCompile(`(?i)(\d{6,8})_(\d{6})(?:_[^/\\]*)?\.wav$`))// dateParts represents parsed date components for format detectiontype dateParts struct {x1 int // First 2 digitsm int // Middle 2 digits (always month)x2 int // Last 2 digits}// FilenameTimestamp represents a parsed timestamp from a filenametype FilenameTimestamp struct {Filename stringTimestamp time.TimeFormat DateFormat}// ParseFilenameTimestamps extracts timestamps from a batch of filenames using variance-based format detection.// Uses variance-based disambiguation for 6-digit dates (YYMMDD vs DDMMYY).// Returns timestamps in UTC (timezone must be applied separately).func ParseFilenameTimestamps(filenames []string) ([]FilenameTimestamp, error) {if len(filenames) == 0 {return nil, fmt.Errorf("no filenames provided")}// Detect date format by analyzing all filenamesformat, err := detectDateFormat(filenames)if err != nil {return nil, err}// Parse all filenames using detected formatresults := make([]FilenameTimestamp, 0, len(filenames))for _, filename := range filenames {timestamp, err := parseFilenameWithFormat(filename, format)if err != nil {return nil, fmt.Errorf("failed to parse %s: %w", filename, err)}results = append(results, FilenameTimestamp{Filename: filename,Timestamp: timestamp,Format: format,})}return results, nil}// ApplyTimezoneOffset converts local timestamps to a location timezone with DST handling.// Uses the EARLIEST (chronologically) timestamp to determine the offset, then applies it to all.// This matches AudioMoth behavior (no DST adjustment during deployment).func ApplyTimezoneOffset(timestamps []FilenameTimestamp, timezoneID string) ([]time.Time, error) {if len(timestamps) == 0 {return nil, fmt.Errorf("no timestamps provided")}// Load timezone locationloc, err := time.LoadLocation(timezoneID)if err != nil {return nil, fmt.Errorf("invalid timezone %s: %w", timezoneID, err)}// Find chronologically earliest timestampearliestUTC := timestamps[0].Timestampfor _, ts := range timestamps[1:] {if ts.Timestamp.Before(earliestUTC) {earliestUTC = ts.Timestamp}}// Calculate offset from earliest timestampearliestInZone := time.Date(earliestUTC.Year(), earliestUTC.Month(), earliestUTC.Day(),earliestUTC.Hour(), earliestUTC.Minute(), earliestUTC.Second(),0, loc,)// Get fixed offset (doesn't change for DST)_, offsetSeconds := earliestInZone.Zone()fixedOffset := time.FixedZone("Fixed", offsetSeconds)// Apply SAME offset to ALL timestamps (maintaining original order)results := make([]time.Time, len(timestamps))for i, ts := range timestamps {adjusted := time.Date(ts.Timestamp.Year(), ts.Timestamp.Month(), ts.Timestamp.Day(),ts.Timestamp.Hour(), ts.Timestamp.Minute(), ts.Timestamp.Second(),0, fixedOffset,)results[i] = adjusted}return results, nil}// detectDateFormat analyzes filenames to determine the date formatfunc detectDateFormat(filenames []string) (DateFormat, error) {// Extract all date parts from filenamesvar parts []datePartsvar has8Digit boolfor _, filename := range filenames {basename := filepath.Base(filename)matches := timestampPattern.FindStringSubmatch(basename)if matches == nil {continue}dateStr := matches[1]// Check for 8-digit format (YYYYMMDD)if len(dateStr) == 8 {has8Digit = truecontinue}// Parse 6-digit formatif len(dateStr) == 6 {x1, _ := strconv.Atoi(dateStr[0:2])m, _ := strconv.Atoi(dateStr[2:4])x2, _ := strconv.Atoi(dateStr[4:6])parts = append(parts, dateParts{x1: x1, m: m, x2: x2})}}// If all files are 8-digit, that's the formatif has8Digit && len(parts) == 0 {return Format8Digit, nil}// If mixed 8-digit and 6-digit, return errorif has8Digit && len(parts) > 0 {return 0, fmt.Errorf("mixed date formats detected (8-digit and 6-digit)")}// If no 6-digit dates found, cannot determineif len(parts) == 0 {return 0, fmt.Errorf("no valid timestamp filenames found")}// Need at least 2 files with different dates to disambiguate YYMMDD vs DDMMYYif len(parts) == 1 {return 0, fmt.Errorf("need at least 2 files to disambiguate 6-digit date format (YYMMDD vs DDMMYY)")}// Use variance-based disambiguation for 6-digit dates// Compare uniqueness of x1 (first 2 digits) vs x2 (last 2 digits)// Day values vary more than year values across recordingsuniqueX1 := countUnique(parts, func(p dateParts) int { return p.x1 })uniqueX2 := countUnique(parts, func(p dateParts) int { return p.x2 })if uniqueX2 >= uniqueX1 {// x2 has more variance → likely day values → YYMMDD formatreturn Format6YYMMDD, nil} else {// x1 has more variance → likely day values → DDMMYY formatreturn Format6DDMMYY, nil}}// parseFilenameWithFormat parses a filename using the specified formatfunc parseFilenameWithFormat(filename string, format DateFormat) (time.Time, error) {basename := filepath.Base(filename)matches := timestampPattern.FindStringSubmatch(basename)if matches == nil {return time.Time{}, fmt.Errorf("filename does not match timestamp pattern: %s", basename)}dateStr := matches[1]timeStr := matches[2]var year, month, day intswitch format {case Format8Digit:if len(dateStr) != 8 {return time.Time{}, fmt.Errorf("expected 8-digit date, got %d digits", len(dateStr))}year, _ = strconv.Atoi(dateStr[0:4])month, _ = strconv.Atoi(dateStr[4:6])day, _ = strconv.Atoi(dateStr[6:8])case Format6YYMMDD:if len(dateStr) != 6 {return time.Time{}, fmt.Errorf("expected 6-digit date, got %d digits", len(dateStr))}yy, _ := strconv.Atoi(dateStr[0:2])month, _ = strconv.Atoi(dateStr[2:4])day, _ = strconv.Atoi(dateStr[4:6])// Convert 2-digit year to 4-digit (assume 2000-2099)year = 2000 + yycase Format6DDMMYY:if len(dateStr) != 6 {return time.Time{}, fmt.Errorf("expected 6-digit date, got %d digits", len(dateStr))}day, _ = strconv.Atoi(dateStr[0:2])month, _ = strconv.Atoi(dateStr[2:4])yy, _ := strconv.Atoi(dateStr[4:6])// Convert 2-digit year to 4-digit (assume 2000-2099)year = 2000 + yy}// Parse time (HHMMSS)if len(timeStr) != 6 {return time.Time{}, fmt.Errorf("invalid time format: %s", timeStr)}hour, _ := strconv.Atoi(timeStr[0:2])minute, _ := strconv.Atoi(timeStr[2:4])second, _ := strconv.Atoi(timeStr[4:6])// Construct timestamp in UTC (timezone applied separately)timestamp := time.Date(year, time.Month(month), day, hour, minute, second, 0, time.UTC)// Validate dateif timestamp.Month() != time.Month(month) || timestamp.Day() != day {return time.Time{}, fmt.Errorf("invalid date: %04d-%02d-%02d", year, month, day)}return timestamp, nil}// countUnique counts unique values using an extractor functionfunc countUnique(parts []dateParts, extractor func(p dateParts) int) int {seen := make(map[int]bool)for _, p := range parts {seen[extractor(p)] = true}return len(seen)}// HasTimestampFilename checks if a filename contains a timestamp patternfunc HasTimestampFilename(filename string) bool {basename := filepath.Base(filename)return timestampPattern.MatchString(basename)}
package utilsimport ("os""testing")func TestDataFileParse(t *testing.T) {// Create a test .data filecontent := `[{"Operator": "Auto", "Reviewer": null, "Duration": 60.0},[10.0, 20.0, 0, 0, [{"species": "Kiwi", "certainty": 70, "filter": "test-filter"}]],[30.0, 40.0, 1000, 5000, [{"species": "Morepork", "certainty": 80, "filter": "M"}]]]`tmpfile, err := os.CreateTemp("", "test*.data")if err != nil {t.Fatal(err)}defer os.Remove(tmpfile.Name())if _, err := tmpfile.Write([]byte(content)); err != nil {t.Fatal(err)}tmpfile.Close()// Parsedf, err := ParseDataFile(tmpfile.Name())if err != nil {t.Fatal(err)}// Check metadataif df.Meta.Operator != "Auto" {t.Errorf("expected Operator=Auto, got %s", df.Meta.Operator)}if df.Meta.Duration != 60.0 {t.Errorf("expected Duration=60.0, got %f", df.Meta.Duration)}// Check segmentsif len(df.Segments) != 2 {t.Errorf("expected 2 segments, got %d", len(df.Segments))}// Check first segment (sorted by start time)if df.Segments[0].StartTime != 10.0 {t.Errorf("expected StartTime=10.0, got %f", df.Segments[0].StartTime)}if df.Segments[0].EndTime != 20.0 {t.Errorf("expected EndTime=20.0, got %f", df.Segments[0].EndTime)}// Check labelsif len(df.Segments[0].Labels) != 1 {t.Errorf("expected 1 label, got %d", len(df.Segments[0].Labels))}if df.Segments[0].Labels[0].Species != "Kiwi" {t.Errorf("expected Species=Kiwi, got %s", df.Segments[0].Labels[0].Species)}if df.Segments[0].Labels[0].Certainty != 70 {t.Errorf("expected Certainty=70, got %d", df.Segments[0].Labels[0].Certainty)}}func TestDataFileWrite(t *testing.T) {df := &DataFile{FilePath: "",Meta: &DataMeta{Operator: "Test",Reviewer: "David",Duration: 120.0,},Segments: []*Segment{{StartTime: 5.0,EndTime: 15.0,FreqLow: 0,FreqHigh: 0,Labels: []*Label{{Species: "Kiwi", Certainty: 100, Filter: "test"},},},},}tmpfile, err := os.CreateTemp("", "test*.data")if err != nil {t.Fatal(err)}tmpfile.Close()defer os.Remove(tmpfile.Name())// Writeif err := df.Write(tmpfile.Name()); err != nil {t.Fatal(err)}// Re-parse and verifydf2, err := ParseDataFile(tmpfile.Name())if err != nil {t.Fatal(err)}if df2.Meta.Reviewer != "David" {t.Errorf("expected Reviewer=David, got %s", df2.Meta.Reviewer)}if len(df2.Segments) != 1 {t.Errorf("expected 1 segment, got %d", len(df2.Segments))}if df2.Segments[0].Labels[0].Species != "Kiwi" {t.Errorf("expected Species=Kiwi, got %s", df2.Segments[0].Labels[0].Species)}}func TestHasFilterLabel(t *testing.T) {seg := &Segment{Labels: []*Label{{Species: "Kiwi", Filter: "test-filter"},{Species: "Morepork", Filter: "M"},},}if !seg.HasFilterLabel("test-filter") {t.Error("expected HasFilterLabel(test-filter)=true")}if !seg.HasFilterLabel("M") {t.Error("expected HasFilterLabel(M)=true")}if seg.HasFilterLabel("other") {t.Error("expected HasFilterLabel(other)=false")}if !seg.HasFilterLabel("") {t.Error("expected HasFilterLabel('')=true (no filter)")}}func TestGetFilterLabels(t *testing.T) {seg := &Segment{Labels: []*Label{{Species: "Kiwi", Filter: "test-filter", Certainty: 70},{Species: "Morepork", Filter: "M", Certainty: 80},{Species: "Don't Know", Filter: "test-filter", Certainty: 0},},}labels := seg.GetFilterLabels("test-filter")if len(labels) != 2 {t.Errorf("expected 2 labels, got %d", len(labels))}labels = seg.GetFilterLabels("")if len(labels) != 3 {t.Errorf("expected 3 labels (no filter), got %d", len(labels))}}func TestLabelComment(t *testing.T) {// Test parsing comment from .data filecontent := `[{"Operator": "Test", "Duration": 60.0},[10.0, 20.0, 0, 0, [{"species": "Kiwi", "certainty": 100, "filter": "M", "comment": "Good call"}]]]`tmpfile, err := os.CreateTemp("", "test*.data")if err != nil {t.Fatal(err)}defer os.Remove(tmpfile.Name())if _, err := tmpfile.Write([]byte(content)); err != nil {t.Fatal(err)}tmpfile.Close()df, err := ParseDataFile(tmpfile.Name())if err != nil {t.Fatal(err)}if df.Segments[0].Labels[0].Comment != "Good call" {t.Errorf("expected Comment='Good call', got '%s'", df.Segments[0].Labels[0].Comment)}// Test writing commentdf.Segments[0].Labels[0].Comment = "Updated comment"tmpfile2, err := os.CreateTemp("", "test2*.data")if err != nil {t.Fatal(err)}tmpfile2.Close()defer os.Remove(tmpfile2.Name())if err := df.Write(tmpfile2.Name()); err != nil {t.Fatal(err)}// Re-parse and verifydf2, err := ParseDataFile(tmpfile2.Name())if err != nil {t.Fatal(err)}if df2.Segments[0].Labels[0].Comment != "Updated comment" {t.Errorf("expected Comment='Updated comment', got '%s'", df2.Segments[0].Labels[0].Comment)}}func TestSkraakHashRoundTrip(t *testing.T) {// Test that skraak_hash in metadata is preserved through parse/write cycledf := &DataFile{Meta: &DataMeta{Operator: "Test",Duration: 60.0,Extra: map[string]any{"skraak_hash": "abc123def456",},},Segments: []*Segment{{StartTime: 10.0,EndTime: 20.0,Labels: []*Label{{Species: "Kiwi", Certainty: 100, Filter: "M"},},},},}tmpfile, err := os.CreateTemp("", "test*.data")if err != nil {t.Fatal(err)}tmpfile.Close()defer os.Remove(tmpfile.Name())// Writeif err := df.Write(tmpfile.Name()); err != nil {t.Fatal(err)}// Re-parsedf2, err := ParseDataFile(tmpfile.Name())if err != nil {t.Fatal(err)}// Verify skraak_hash preservedif df2.Meta.Extra == nil {t.Fatal("expected Extra to be non-nil")}hash, ok := df2.Meta.Extra["skraak_hash"].(string)if !ok {t.Fatal("expected skraak_hash to be string")}if hash != "abc123def456" {t.Errorf("expected skraak_hash=abc123def456, got %s", hash)}}func TestSkraakLabelIDRoundTrip(t *testing.T) {// Test that skraak_label_id in labels is preserved through parse/write cycledf := &DataFile{Meta: &DataMeta{Operator: "Test",Duration: 60.0,},Segments: []*Segment{{StartTime: 10.0,EndTime: 20.0,Labels: []*Label{{Species: "Kiwi",Certainty: 100,Filter: "M",Extra: map[string]any{"skraak_label_id": "label_abc123",},},},},},}tmpfile, err := os.CreateTemp("", "test*.data")if err != nil {t.Fatal(err)}tmpfile.Close()defer os.Remove(tmpfile.Name())// Writeif err := df.Write(tmpfile.Name()); err != nil {t.Fatal(err)}// Re-parsedf2, err := ParseDataFile(tmpfile.Name())if err != nil {t.Fatal(err)}// Verify skraak_label_id preservedif len(df2.Segments) != 1 {t.Fatalf("expected 1 segment, got %d", len(df2.Segments))}if len(df2.Segments[0].Labels) != 1 {t.Fatalf("expected 1 label, got %d", len(df2.Segments[0].Labels))}label := df2.Segments[0].Labels[0]if label.Extra == nil {t.Fatal("expected label Extra to be non-nil")}labelID, ok := label.Extra["skraak_label_id"].(string)if !ok {t.Fatal("expected skraak_label_id to be string")}if labelID != "label_abc123" {t.Errorf("expected skraak_label_id=label_abc123, got %s", labelID)}}func TestSkraakFieldsBothPresent(t *testing.T) {// Test both skraak_hash and skraak_label_id togetherdf := &DataFile{Meta: &DataMeta{Operator: "Test",Duration: 60.0,Extra: map[string]any{"skraak_hash": "file_hash_xyz",},},Segments: []*Segment{{StartTime: 10.0,EndTime: 20.0,Labels: []*Label{{Species: "Kiwi",Certainty: 100,Filter: "M",Extra: map[string]any{"skraak_label_id": "label_id_1",},},{Species: "Roroa",Certainty: 90,Filter: "M",Extra: map[string]any{"skraak_label_id": "label_id_2",},},},},},}tmpfile, err := os.CreateTemp("", "test*.data")if err != nil {t.Fatal(err)}tmpfile.Close()defer os.Remove(tmpfile.Name())// Writeif err := df.Write(tmpfile.Name()); err != nil {t.Fatal(err)}// Re-parsedf2, err := ParseDataFile(tmpfile.Name())if err != nil {t.Fatal(err)}// Verify skraak_hashif df2.Meta.Extra["skraak_hash"] != "file_hash_xyz" {t.Errorf("expected skraak_hash=file_hash_xyz, got %v", df2.Meta.Extra["skraak_hash"])}// Verify both label IDsif len(df2.Segments[0].Labels) != 2 {t.Fatalf("expected 2 labels, got %d", len(df2.Segments[0].Labels))}labelIDs := []string{"label_id_1", "label_id_2"}for i, label := range df2.Segments[0].Labels {if label.Extra["skraak_label_id"] != labelIDs[i] {t.Errorf("label %d: expected skraak_label_id=%s, got %v", i, labelIDs[i], label.Extra["skraak_label_id"])}}}func TestSegmentMatchesFilters(t *testing.T) {// Create test segments with various labelsseg := &Segment{Labels: []*Label{{Species: "Kiwi", Filter: "model-1.0", CallType: "Duet", Certainty: 70},{Species: "Morepork", Filter: "model-2.0", CallType: "", Certainty: 100},},}tests := []struct {name stringfilter stringspecies stringcallType stringcertainty intwant bool}{{"no filters", "", "", "", -1, true},{"filter only match", "model-1.0", "", "", -1, true},{"filter only no match", "model-3.0", "", "", -1, false},{"species only match", "", "Kiwi", "", -1, true},{"species only no match", "", "Tomtit", "", -1, false},{"calltype only match", "", "", "Duet", -1, true},{"calltype only no match", "", "", "Male", -1, false},{"certainty match", "", "", "", 70, true},{"certainty no match", "", "", "", 80, false},{"certainty 100 match", "", "", "", 100, true},{"filter+species match", "model-1.0", "Kiwi", "", -1, true},{"filter+species+calltype match", "model-1.0", "Kiwi", "Duet", -1, true},{"filter+species+calltype+certainty match", "model-1.0", "Kiwi", "Duet", 70, true},{"filter+species+calltype certainty miss", "model-1.0", "Kiwi", "Duet", 100, false},{"filter match species miss", "model-1.0", "Morepork", "", -1, false},{"all miss", "model-3.0", "Tomtit", "Male", -1, false},}for _, tt := range tests {t.Run(tt.name, func(t *testing.T) {got := seg.SegmentMatchesFilters(tt.filter, tt.species, tt.callType, tt.certainty)if got != tt.want {t.Errorf("SegmentMatchesFilters(%q, %q, %q, %d) = %v, want %v",tt.filter, tt.species, tt.callType, tt.certainty, got, tt.want)}})}}func TestParseSpeciesCallType(t *testing.T) {tests := []struct {input stringspecies stringcallType string}{{"", "", ""},{"Kiwi", "Kiwi", ""},{"Kiwi+Duet", "Kiwi", "Duet"},{"GSK+Female", "GSK", "Female"},{"Species+With+Multiple+Plus", "Species", "With+Multiple+Plus"},}for _, tt := range tests {t.Run(tt.input, func(t *testing.T) {species, callType := ParseSpeciesCallType(tt.input)if species != tt.species || callType != tt.callType {t.Errorf("ParseSpeciesCallType(%q) = (%q, %q), want (%q, %q)",tt.input, species, callType, tt.species, tt.callType)}})}}{"Kiwi+_", "Kiwi", "_"},{"CallTypeNone matches empty calltype", "model-2.0", "Morepork", CallTypeNone, -1, true},{"CallTypeNone skips non-empty calltype", "model-1.0", "Kiwi", CallTypeNone, -1, false},{"CallTypeNone + certainty match", "model-2.0", "Morepork", CallTypeNone, 100, true},{"CallTypeNone + certainty miss", "model-2.0", "Morepork", CallTypeNone, 70, false},
package utilsimport ("encoding/json""fmt""maps""os""sort""strings")// DataFile represents an AviaNZ .data filetype DataFile struct {Meta *DataMetaSegments []*SegmentFilePath string}// DataMeta contains metadata for a .data filetype DataMeta struct {Operator stringReviewer stringDuration float64Extra map[string]any // preserve unknown fields}// Segment represents a detection segmenttype Segment struct {StartTime float64EndTime float64FreqLow float64FreqHigh float64Labels []*Label}// Label represents a species label within a segmenttype Label struct {Species stringCertainty intFilter stringCallType stringComment string // user comment (max 140 chars, ASCII only)Bookmark bool // user bookmark for navigationExtra map[string]any // preserve unknown fields}// ParseDataFile reads and parses a .data filefunc ParseDataFile(path string) (*DataFile, error) {data, err := os.ReadFile(path)if err != nil {return nil, err}var raw []json.RawMessageif err := json.Unmarshal(data, &raw); err != nil {return nil, fmt.Errorf("parse JSON: %w", err)}if len(raw) == 0 {return nil, fmt.Errorf("empty .data file")}df := &DataFile{FilePath: path,Segments: make([]*Segment, 0, len(raw)-1),}// Parse metadata (first element)df.Meta = parseMeta(raw[0])// Parse segmentsfor i := 1; i < len(raw); i++ {seg, err := parseSegment(raw[i])if err != nil {continue // skip invalid segments}df.Segments = append(df.Segments, seg)}// Sort segments by start timesort.Slice(df.Segments, func(i, j int) bool {return df.Segments[i].StartTime < df.Segments[j].StartTime})return df, nil}// parseMeta parses the metadata objectfunc parseMeta(raw json.RawMessage) *DataMeta {var obj map[string]anyif err := json.Unmarshal(raw, &obj); err != nil {return &DataMeta{}}meta := &DataMeta{Extra: make(map[string]any)}if v, ok := obj["Operator"].(string); ok {meta.Operator = vdelete(obj, "Operator")}if v, ok := obj["Reviewer"].(string); ok {meta.Reviewer = vdelete(obj, "Reviewer")}if v, ok := obj["Duration"].(float64); ok {meta.Duration = vdelete(obj, "Duration")}// Store remaining fieldsmaps.Copy(meta.Extra, obj)return meta}// parseSegment parses a segment arrayfunc parseSegment(raw json.RawMessage) (*Segment, error) {var arr []json.RawMessageif err := json.Unmarshal(raw, &arr); err != nil {return nil, err}if len(arr) < 5 {return nil, fmt.Errorf("segment too short")}seg := &Segment{}// Parse time and frequencyif v, err := parseFloat(arr[0]); err == nil {seg.StartTime = v}if v, err := parseFloat(arr[1]); err == nil {seg.EndTime = v}if v, err := parseFloat(arr[2]); err == nil {seg.FreqLow = v}if v, err := parseFloat(arr[3]); err == nil {seg.FreqHigh = v}// Parse labelsvar labelArr []json.RawMessageif err := json.Unmarshal(arr[4], &labelArr); err == nil {for _, labelRaw := range labelArr {if label := parseLabel(labelRaw); label != nil {seg.Labels = append(seg.Labels, label)}}}// Sort labels alphabetically by speciessort.Slice(seg.Labels, func(i, j int) bool {return seg.Labels[i].Species < seg.Labels[j].Species})return seg, nil}// parseLabel parses a label objectfunc parseLabel(raw json.RawMessage) *Label {var obj map[string]anyif err := json.Unmarshal(raw, &obj); err != nil {return nil}label := &Label{Extra: make(map[string]any)}if v, ok := obj["species"].(string); ok {label.Species = vdelete(obj, "species")}if v, ok := obj["certainty"].(float64); ok {label.Certainty = int(v)delete(obj, "certainty")}if v, ok := obj["filter"].(string); ok {label.Filter = vdelete(obj, "filter")}if v, ok := obj["calltype"].(string); ok {label.CallType = vdelete(obj, "calltype")}if v, ok := obj["comment"].(string); ok {label.Comment = vdelete(obj, "comment")}if v, ok := obj["bookmark"].(bool); ok {label.Bookmark = vdelete(obj, "bookmark")}// Store remaining fieldsmaps.Copy(label.Extra, obj)return label}// parseFloat extracts a float from JSONfunc parseFloat(raw json.RawMessage) (float64, error) {var v float64err := json.Unmarshal(raw, &v)return v, err}// WriteDataFile writes a DataFile back to diskfunc (df *DataFile) Write(path string) error {var raw []any// Build metadatameta := make(map[string]any)if df.Meta.Operator != "" {meta["Operator"] = df.Meta.Operator}if df.Meta.Reviewer != "" {meta["Reviewer"] = df.Meta.Reviewer}if df.Meta.Duration > 0 {meta["Duration"] = df.Meta.Duration}maps.Copy(meta, df.Meta.Extra)raw = append(raw, meta)// Build segmentsfor _, seg := range df.Segments {labels := make([]any, 0, len(seg.Labels))for _, label := range seg.Labels {l := make(map[string]any)l["species"] = label.Speciesl["certainty"] = label.Certaintyif label.Filter != "" {l["filter"] = label.Filter}if label.CallType != "" {l["calltype"] = label.CallType}if label.Comment != "" {l["comment"] = label.Comment}if label.Bookmark {l["bookmark"] = true}maps.Copy(l, label.Extra)labels = append(labels, l)}segArr := []any{seg.StartTime,seg.EndTime,seg.FreqLow,seg.FreqHigh,labels,}raw = append(raw, segArr)}data, err := json.MarshalIndent(raw, "", " ")if err != nil {return err}return os.WriteFile(path, data, 0644)}// HasFilterLabel returns true if segment has a label matching the filterfunc (s *Segment) HasFilterLabel(filter string) bool {if filter == "" {return true}for _, label := range s.Labels {if label.Filter == filter {return true}}return false}// GetFilterLabels returns labels matching the filterfunc (s *Segment) GetFilterLabels(filter string) []*Label {var result []*Labelfor _, label := range s.Labels {if filter == "" || label.Filter == filter {result = append(result, label)}}return result}// SegmentMatchesFilters returns true if the segment has any label matching all filter criteria.// All non-empty/non-negative parameters must match for a label to be considered a match.// Use certainty=-1 to indicate no certainty filtering (since 0 is a valid certainty value).func (s *Segment) SegmentMatchesFilters(filter, species, callType string, certainty int) bool {if filter == "" && species == "" && callType == "" && certainty < 0 {return true // No filters, match all}for _, label := range s.Labels {if labelMatchesFilters(label, filter, species, callType, certainty) {return true}}return false}// labelMatchesFilters checks if a single label matches all filter criteria.func labelMatchesFilters(label *Label, filter, species, callType string, certainty int) bool {if filter != "" && label.Filter != filter {return false}if species != "" && label.Species != species {return false}if callType == CallTypeNone {if label.CallType != "" {return false}} else if callType != "" && label.CallType != callType {return false}if certainty >= 0 && label.Certainty != certainty {return false}return true}// ParseSpeciesCallType parses a species string with optional calltype into separate values.// Format: "Species" or "Species+CallType" (e.g., "Kiwi" or "Kiwi+Duet").func ParseSpeciesCallType(label string) (species, callType string) {if label == "" {return "", ""}if before, after, ok := strings.Cut(label, "+"); ok {return before, after}return label, ""}// FindDataFiles finds all .data files in a folder, ignoring hidden files (starting with ".")func FindDataFiles(folder string) ([]string, error) {return FindFiles(folder, FindFilesOptions{Extension: ".data",Recursive: false,SkipHidden: true,})}// Use "_" as the calltype to match only labels with no calltype (e.g., "Kiwi+_").// CallTypeNone is a sentinel value used in --species Species+_ to match// only labels with an empty calltype.const CallTypeNone = "_"
package utilsimport ("encoding/json""fmt""os""path/filepath")// ~/.skraak/config.json schema (reference)://// {// "classify": {// "reviewer": "string, required. Name stamped into .data file meta on any edit.",// "color": "bool, optional. Colored spectrograms in the TUI. Default false.",// "sixel": "bool, optional. Use sixel image protocol. Default false (Kitty).",// "iterm": "bool, optional. Use iTerm inline-image protocol. Default false.",// "img_dims": "int, optional. Spectrogram display size in pixels. 0 = default.",//// "bindings": {// "<key>": "Species" // e.g. "c": "comcha"// "<key>": "Species+CallType" // e.g. "1": "Kiwi+Duet"// // <key> is a single character. Reserved: ",", ".", "0", " " (space).// // Pressing <key> labels the current segment (certainty 100, or 0 for// // "Don't Know"), saves, and advances.// },//// "secondary_bindings": {// "<primary-key>": {// "<key>": "CallType" // e.g. "a": "alarm"// // <key> is a single character, same reserved-key rules as bindings.// // Outer <primary-key> must also exist in "bindings".// }// // Optional. Invoked via Shift+<primary-key>: labels the species with// // an empty calltype, does NOT advance, and waits for one follow-up// // key looked up in this inner map. Match -> set calltype, save,// // advance. Esc -> exit wait mode without advancing. Any other key ->// // exit wait mode and handle the key normally.// // Shift+<primary-key> on a primary without a secondary_bindings entry// // falls back to normal primary behavior.// }// }// }//// Example://// {// "classify": {// "reviewer": "David",// "color": true,// "bindings": {// "c": "comcha",// "k": "kea1",// "x": "Noise",// "z": "Don't Know",// "1": "Kiwi+Duet",// "4": "Kiwi"// },// "secondary_bindings": {// "c": { "a": "alarm", "s": "song", "n": "contact" }// }// }// }//// Config holds user-level defaults loaded from ~/.skraak/config.json.// Per-subcommand sections live as named fields.type Config struct {Classify ClassifyFileConfig `json:"classify"`}// ClassifyFileConfig holds defaults for `skraak calls classify`.// Bindings maps a single-character key to "Species" or "Species+CallType".type ClassifyFileConfig struct {Reviewer string `json:"reviewer"`Color bool `json:"color"`Sixel bool `json:"sixel"`ITerm bool `json:"iterm"`ImgDims int `json:"img_dims"`Bindings map[string]string `json:"bindings"`// SecondaryBindings extends a primary binding with per-species calltype// choices. Outer key is the primary binding key; inner map is// single-char key -> calltype string. Invoked via Shift+primary-key.SecondaryBindings map[string]map[string]string `json:"secondary_bindings,omitempty"`}// ConfigPath returns the absolute path to ~/.skraak/config.json.func ConfigPath() (string, error) {home, err := os.UserHomeDir()if err != nil {return "", fmt.Errorf("resolving home directory: %w", err)}return filepath.Join(home, ".skraak", "config.json"), nil}// LoadConfig reads ~/.skraak/config.json and returns the parsed config and the// resolved path (useful for error messages).func LoadConfig() (Config, string, error) {var cfg Configpath, err := ConfigPath()if err != nil {return cfg, "", err}data, err := os.ReadFile(path)if err != nil {return cfg, path, fmt.Errorf("reading %s: %w", path, err)}if err := json.Unmarshal(data, &cfg); err != nil {return cfg, path, fmt.Errorf("parsing %s: %w", path, err)}return cfg, path, nil}
package utilsimport ("math""testing")// Reference values verified against opensoundscape.utils.generate_clip_times_df// at https://github.com/kitzeslab/opensoundscape/blob/master/opensoundscape/utils.pyfunc TestGenerateClipTimes_FullModeBasic(t *testing.T) {// full_duration=10, clip_duration=4, overlap=0.5, final="full"// increment = 3.5// raw starts: 0, 3.5, 7 (next would be 10.5 ≥ 10)// raw ends: 4, 7.5, 11// "full": last clip start shifts back by (11-10)=1 → start=6, end=10// → [(0,4), (3.5,7.5), (6,10)]got, err := GenerateClipTimes(10, 4, 0.5, FinalClipFull, 10)if err != nil {t.Fatal(err)}want := []ClipWindow{{0, 4}, {3.5, 7.5}, {6, 10}}assertClips(t, got, want)}func TestGenerateClipTimes_NoneMode(t *testing.T) {// final="none": drop any clip whose end > full_duration.// full=10, dur=4, overlap=0: starts 0,4,8; ends 4,8,12 → keep (0,4),(4,8)got, err := GenerateClipTimes(10, 4, 0, FinalClipNone, 10)if err != nil {t.Fatal(err)}assertClips(t, got, []ClipWindow{{0, 4}, {4, 8}})}func TestGenerateClipTimes_RemainderMode(t *testing.T) {// full=10, dur=4, overlap=0: starts 0,4,8; ends 4,8,12// remainder: trim 12 → 10. → (0,4),(4,8),(8,10)got, err := GenerateClipTimes(10, 4, 0, FinalClipRemainder, 10)if err != nil {t.Fatal(err)}assertClips(t, got, []ClipWindow{{0, 4}, {4, 8}, {8, 10}})}func TestGenerateClipTimes_ExtendMode(t *testing.T) {got, err := GenerateClipTimes(10, 4, 0, FinalClipExtend, 10)if err != nil {t.Fatal(err)}assertClips(t, got, []ClipWindow{{0, 4}, {4, 8}, {8, 12}})}func TestGenerateClipTimes_AudioShorterThanClip(t *testing.T) {// full=2, dur=4, overlap=0, final="full":// raw start=0, end=4; end > full=2 → start shifts to 0-(4-2)=-2 → clamped to 0;// end=2 → single clip (0,2)got, err := GenerateClipTimes(2, 4, 0, FinalClipFull, 10)if err != nil {t.Fatal(err)}assertClips(t, got, []ClipWindow{{0, 2}})}func TestGenerateClipTimes_DedupAfterFullShift(t *testing.T) {// full=8, dur=4, overlap=0:// raw starts 0,4; ends 4,8 — no shift needed; output (0,4),(4,8).// (Tests the no-duplicate path.)got, err := GenerateClipTimes(8, 4, 0, FinalClipFull, 10)if err != nil {t.Fatal(err)}assertClips(t, got, []ClipWindow{{0, 4}, {4, 8}})}func TestGenerateClipTimes_InvalidArgs(t *testing.T) {_, err := GenerateClipTimes(10, 0, 0, FinalClipFull, 10)if err == nil {t.Error("expected error for clip_duration=0")}_, err = GenerateClipTimes(10, 4, 4, FinalClipFull, 10)if err == nil {t.Error("expected error for clip_overlap >= clip_duration")}_, err = GenerateClipTimes(0, 4, 0, FinalClipFull, 10)if err == nil {t.Error("expected error for full_duration=0")}}func assertClips(t *testing.T, got, want []ClipWindow) {t.Helper()if len(got) != len(want) {t.Fatalf("len(got)=%d, len(want)=%d\ngot=%v\nwant=%v", len(got), len(want), got, want)}for i := range got {if math.Abs(got[i].Start-want[i].Start) > 1e-9 || math.Abs(got[i].End-want[i].End) > 1e-9 {t.Errorf("clip %d: got (%v,%v), want (%v,%v)", i, got[i].Start, got[i].End, want[i].Start, want[i].End)}}}func TestParseFinalClipMode(t *testing.T) {tests := []struct {input stringwant FinalClipModeerr bool}{{"none", FinalClipNone, false},{"", FinalClipNone, false},{"remainder", FinalClipRemainder, false},{"full", FinalClipFull, false},{"extend", FinalClipExtend, false},{"invalid", 0, true},{"FULL", 0, true}, // case-sensitive}for _, tt := range tests {t.Run(tt.input, func(t *testing.T) {got, err := ParseFinalClipMode(tt.input)if tt.err {if err == nil {t.Error("expected error")}} else {if err != nil {t.Errorf("unexpected error: %v", err)}if got != tt.want {t.Errorf("got %d, want %d", got, tt.want)}}})}}
package utilsimport ("fmt""math")// ClipWindow is a fixed-duration time window for one audio file.type ClipWindow struct {Start float64End float64}// FinalClipMode controls how the trailing partial clip is handled.// Mirrors opensoundscape.utils.generate_clip_times_df:// - FinalClipNone: discard any clip whose end exceeds full_duration// - FinalClipRemainder: trim the final clip's end to full_duration (shorter clip)// - FinalClipFull: shift the final clip's start back so its end equals full_duration// - FinalClipExtend: keep the final clip extending beyond full_durationtype FinalClipMode intconst (FinalClipNone FinalClipMode = iotaFinalClipRemainderFinalClipFullFinalClipExtend)// ParseFinalClipMode parses a CLI flag value.func ParseFinalClipMode(s string) (FinalClipMode, error) {switch s {case "none", "":return FinalClipNone, nilcase "remainder":return FinalClipRemainder, nilcase "full":return FinalClipFull, nilcase "extend":return FinalClipExtend, nildefault:return 0, fmt.Errorf("invalid final-clip mode %q (want one of: none, remainder, full, extend)", s)}}// roundTo rounds x to `precision` decimal places. Mirrors numpy.round behaviour.// Pass precision < 0 to skip rounding.func roundTo(x float64, precision int) float64 {if precision < 0 {return x}scale := math.Pow(10, float64(precision))return math.Round(x*scale) / scale}// GenerateClipTimes ports opensoundscape.utils.generate_clip_times_df.//// Args mirror the Python signature: clipDuration > 0, clipOverlap in [0, clipDuration),// fullDuration > 0. roundingPrecision defaults to 10 in OPSO; pass -1 to skip rounding.//// Result is the list of (start, end) windows for one audio file, with duplicates// removed (which can happen under FinalClipFull when the shifted final clip// coincides with the previous one).func GenerateClipTimes(fullDuration, clipDuration, clipOverlap float64, finalClip FinalClipMode, roundingPrecision int) ([]ClipWindow, error) {if clipDuration <= 0 {return nil, fmt.Errorf("clipDuration must be > 0, got %v", clipDuration)}if clipOverlap < 0 || clipOverlap >= clipDuration {return nil, fmt.Errorf("clipOverlap must be in [0, clipDuration), got %v with clipDuration=%v", clipOverlap, clipDuration)}if fullDuration <= 0 {return nil, fmt.Errorf("fullDuration must be > 0, got %v", fullDuration)}starts, ends := buildClipStartsEnds(fullDuration, clipDuration, clipOverlap, roundingPrecision)switch finalClip {case FinalClipNone:return dedupClips(clipWindowsNone(starts, ends, fullDuration)), nilcase FinalClipRemainder:return dedupClips(clipWindowsRemainder(starts, ends, fullDuration)), nilcase FinalClipFull:return dedupClips(clipWindowsFull(starts, ends, fullDuration)), nilcase FinalClipExtend:return dedupClips(clipWindowsExtend(starts, ends)), nildefault:return nil, fmt.Errorf("invalid FinalClipMode %d", finalClip)}}// buildClipStartsEnds generates the start and end arrays for clips.func buildClipStartsEnds(fullDuration, clipDuration, clipOverlap float64, roundingPrecision int) ([]float64, []float64) {increment := clipDuration - clipOverlapvar starts []float64for s := 0.0; s < fullDuration; s += increment {starts = append(starts, roundTo(s, roundingPrecision))}if len(starts) == 0 {starts = []float64{0}}ends := make([]float64, len(starts))for i, s := range starts {ends[i] = s + clipDuration}// clipWindowsNone drops any window whose end exceeds fullDuration.func clipWindowsNone(starts, ends []float64, fullDuration float64) []ClipWindow {out := make([]ClipWindow, 0, len(starts))for i := range starts {if ends[i] <= fullDuration {out = append(out, ClipWindow{Start: starts[i], End: ends[i]})}}return out}// clipWindowsRemainder trims ends beyond fullDuration down to fullDuration.func clipWindowsRemainder(starts, ends []float64, fullDuration float64) []ClipWindow {out := make([]ClipWindow, 0, len(starts))for i := range starts {e := ends[i]if e > fullDuration {e = fullDuration}out = append(out, ClipWindow{Start: starts[i], End: e})}return out}// clipWindowsFull shifts windows whose end exceeds fullDuration back so end == fullDuration.func clipWindowsFull(starts, ends []float64, fullDuration float64) []ClipWindow {out := make([]ClipWindow, 0, len(starts))for i := range starts {s, e := starts[i], ends[i]if e > fullDuration {s -= e - fullDuratione = fullDurationif s < 0 {s = 0}}out = append(out, ClipWindow{Start: s, End: e})}return out}// clipWindowsExtend keeps ends as-is, even past fullDuration.func clipWindowsExtend(starts, ends []float64) []ClipWindow {out := make([]ClipWindow, 0, len(starts))for i := range starts {out = append(out, ClipWindow{Start: starts[i], End: ends[i]})}}// dedupClips removes consecutive duplicates while preserving order.// Matches pandas.DataFrame.drop_duplicates() at the end of OPSO's// generate_clip_times_df.func dedupClips(in []ClipWindow) []ClipWindow {if len(in) <= 1 {return in}seen := make(map[ClipWindow]bool, len(in))out := make([]ClipWindow, 0, len(in))for _, c := range in {if !seen[c] {seen[c] = trueout = append(out, c)}}return out}return outreturn starts, ends}
// MappingQuerier is the read-only interface needed for mapping validation.// Satisfied by *sql.DB, *sql.Tx, and *db.LoggedTx.type MappingQuerier interface {QueryContext(ctx context.Context, query string, args ...any) (*sql.Rows, error)QueryRowContext(ctx context.Context, query string, args ...any) *sql.Row}
query := `SELECT label FROM species WHERE label IN (` + utils.Placeholders(len(speciesLabels)) + `) AND active = true`
query := `SELECT label FROM species WHERE label IN (` + db.Placeholders(len(speciesLabels)) + `) AND active = true`
func validateMappedCalltypes(queryer MappingQuerier, mappedCalltypes map[string]map[string]string, result *mapping.ValidationResult) error {
func validateMappedCalltypes(queryer Reader, mappedCalltypes map[string]map[string]string, result *mapping.ValidationResult) error {
"file1": {Segments: []*utils.Segment{{}, {}}},"file2": {Segments: []*utils.Segment{{}}},"file3": {Segments: []*utils.Segment{{}, {}, {}, {}}},
"file1": {Segments: []*datafile.Segment{{}, {}}},"file2": {Segments: []*datafile.Segment{{}}},"file3": {Segments: []*datafile.Segment{{}, {}, {}, {}}},
package callsimport ("math""testing")// Reference values verified against opensoundscape.utils.generate_clip_times_df// at https://github.com/kitzeslab/opensoundscape/blob/master/opensoundscape/utils.pyfunc TestGenerateClipTimes_FullModeBasic(t *testing.T) {// full_duration=10, clip_duration=4, overlap=0.5, final="full"// increment = 3.5// raw starts: 0, 3.5, 7 (next would be 10.5 ≥ 10)// raw ends: 4, 7.5, 11// "full": last clip start shifts back by (11-10)=1 → start=6, end=10// → [(0,4), (3.5,7.5), (6,10)]got, err := GenerateClipTimes(10, 4, 0.5, FinalClipFull, 10)if err != nil {t.Fatal(err)}want := []ClipWindow{{0, 4}, {3.5, 7.5}, {6, 10}}assertClips(t, got, want)}func TestGenerateClipTimes_NoneMode(t *testing.T) {// final="none": drop any clip whose end > full_duration.// full=10, dur=4, overlap=0: starts 0,4,8; ends 4,8,12 → keep (0,4),(4,8)got, err := GenerateClipTimes(10, 4, 0, FinalClipNone, 10)if err != nil {t.Fatal(err)}assertClips(t, got, []ClipWindow{{0, 4}, {4, 8}})}func TestGenerateClipTimes_RemainderMode(t *testing.T) {// full=10, dur=4, overlap=0: starts 0,4,8; ends 4,8,12// remainder: trim 12 → 10. → (0,4),(4,8),(8,10)got, err := GenerateClipTimes(10, 4, 0, FinalClipRemainder, 10)if err != nil {t.Fatal(err)}assertClips(t, got, []ClipWindow{{0, 4}, {4, 8}, {8, 10}})}func TestGenerateClipTimes_ExtendMode(t *testing.T) {got, err := GenerateClipTimes(10, 4, 0, FinalClipExtend, 10)if err != nil {t.Fatal(err)}assertClips(t, got, []ClipWindow{{0, 4}, {4, 8}, {8, 12}})}func TestGenerateClipTimes_AudioShorterThanClip(t *testing.T) {// full=2, dur=4, overlap=0, final="full":// raw start=0, end=4; end > full=2 → start shifts to 0-(4-2)=-2 → clamped to 0;// end=2 → single clip (0,2)got, err := GenerateClipTimes(2, 4, 0, FinalClipFull, 10)if err != nil {t.Fatal(err)}assertClips(t, got, []ClipWindow{{0, 2}})}func TestGenerateClipTimes_DedupAfterFullShift(t *testing.T) {// full=8, dur=4, overlap=0:// raw starts 0,4; ends 4,8 — no shift needed; output (0,4),(4,8).// (Tests the no-duplicate path.)got, err := GenerateClipTimes(8, 4, 0, FinalClipFull, 10)if err != nil {t.Fatal(err)}assertClips(t, got, []ClipWindow{{0, 4}, {4, 8}})}func TestGenerateClipTimes_InvalidArgs(t *testing.T) {_, err := GenerateClipTimes(10, 0, 0, FinalClipFull, 10)if err == nil {t.Error("expected error for clip_duration=0")}_, err = GenerateClipTimes(10, 4, 4, FinalClipFull, 10)if err == nil {t.Error("expected error for clip_overlap >= clip_duration")}_, err = GenerateClipTimes(0, 4, 0, FinalClipFull, 10)if err == nil {t.Error("expected error for full_duration=0")}}func TestParseFinalClipMode(t *testing.T) {tests := []struct {input stringwant FinalClipModeerr bool}{{"none", FinalClipNone, false},{"", FinalClipNone, false},{"remainder", FinalClipRemainder, false},{"full", FinalClipFull, false},{"extend", FinalClipExtend, false},{"invalid", 0, true},{"FULL", 0, true}, // case-sensitive}for _, tt := range tests {t.Run(tt.input, func(t *testing.T) {got, err := ParseFinalClipMode(tt.input)if tt.err {if err == nil {t.Error("expected error")}} else {if err != nil {t.Errorf("unexpected error: %v", err)}if got != tt.want {t.Errorf("got %d, want %d", got, tt.want)}}})}}func assertClips(t *testing.T, got, want []ClipWindow) {t.Helper()if len(got) != len(want) {t.Fatalf("len(got)=%d, len(want)=%d\ngot=%v\nwant=%v", len(got), len(want), got, want)}for i := range got {if math.Abs(got[i].Start-want[i].Start) > 1e-9 || math.Abs(got[i].End-want[i].End) > 1e-9 {t.Errorf("clip %d: got (%v,%v), want (%v,%v)", i, got[i].Start, got[i].End, want[i].Start, want[i].End)}}}
package callsimport ("fmt""math")// ClipWindow is a fixed-duration time window for one audio file.type ClipWindow struct {Start float64End float64}// FinalClipMode controls how the trailing partial clip is handled.// Mirrors opensoundscape.utils.generate_clip_times_df:// - FinalClipNone: discard any clip whose end exceeds full_duration// - FinalClipRemainder: trim the final clip's end to full_duration (shorter clip)// - FinalClipFull: shift the final clip's start back so its end equals full_duration// - FinalClipExtend: keep the final clip extending beyond full_durationtype FinalClipMode intconst (FinalClipNone FinalClipMode = iotaFinalClipRemainderFinalClipFullFinalClipExtend)// ParseFinalClipMode parses a CLI flag value.func ParseFinalClipMode(s string) (FinalClipMode, error) {switch s {case "none", "":return FinalClipNone, nilcase "remainder":return FinalClipRemainder, nilcase "full":return FinalClipFull, nilcase "extend":return FinalClipExtend, nildefault:return 0, fmt.Errorf("invalid final-clip mode %q (want one of: none, remainder, full, extend)", s)}}// roundTo rounds x to `precision` decimal places. Mirrors numpy.round behaviour.// Pass precision < 0 to skip rounding.func roundTo(x float64, precision int) float64 {if precision < 0 {return x}scale := math.Pow(10, float64(precision))return math.Round(x*scale) / scale}// GenerateClipTimes ports opensoundscape.utils.generate_clip_times_df.//// Args mirror the Python signature: clipDuration > 0, clipOverlap in [0, clipDuration),// fullDuration > 0. roundingPrecision defaults to 10 in OPSO; pass -1 to skip rounding.//// Result is the list of (start, end) windows for one audio file, with duplicates// removed (which can happen under FinalClipFull when the shifted final clip// coincides with the previous one).func GenerateClipTimes(fullDuration, clipDuration, clipOverlap float64, finalClip FinalClipMode, roundingPrecision int) ([]ClipWindow, error) {if clipDuration <= 0 {return nil, fmt.Errorf("clipDuration must be > 0, got %v", clipDuration)}if clipOverlap < 0 || clipOverlap >= clipDuration {return nil, fmt.Errorf("clipOverlap must be in [0, clipDuration), got %v with clipDuration=%v", clipOverlap, clipDuration)}if fullDuration <= 0 {return nil, fmt.Errorf("fullDuration must be > 0, got %v", fullDuration)}starts, ends := buildClipStartsEnds(fullDuration, clipDuration, clipOverlap, roundingPrecision)switch finalClip {case FinalClipNone:return dedupClips(clipWindowsNone(starts, ends, fullDuration)), nilcase FinalClipRemainder:return dedupClips(clipWindowsRemainder(starts, ends, fullDuration)), nilcase FinalClipFull:return dedupClips(clipWindowsFull(starts, ends, fullDuration)), nilcase FinalClipExtend:return dedupClips(clipWindowsExtend(starts, ends)), nildefault:return nil, fmt.Errorf("invalid FinalClipMode %d", finalClip)}}// buildClipStartsEnds generates the start and end arrays for clips.func buildClipStartsEnds(fullDuration, clipDuration, clipOverlap float64, roundingPrecision int) ([]float64, []float64) {increment := clipDuration - clipOverlapvar starts []float64for s := 0.0; s < fullDuration; s += increment {starts = append(starts, roundTo(s, roundingPrecision))}if len(starts) == 0 {starts = []float64{0}}ends := make([]float64, len(starts))for i, s := range starts {ends[i] = s + clipDuration}return starts, ends}// clipWindowsNone drops any window whose end exceeds fullDuration.func clipWindowsNone(starts, ends []float64, fullDuration float64) []ClipWindow {out := make([]ClipWindow, 0, len(starts))for i := range starts {if ends[i] <= fullDuration {out = append(out, ClipWindow{Start: starts[i], End: ends[i]})}}return out}// clipWindowsRemainder trims ends beyond fullDuration down to fullDuration.func clipWindowsRemainder(starts, ends []float64, fullDuration float64) []ClipWindow {out := make([]ClipWindow, 0, len(starts))for i := range starts {e := ends[i]if e > fullDuration {e = fullDuration}out = append(out, ClipWindow{Start: starts[i], End: e})}return out}// clipWindowsFull shifts windows whose end exceeds fullDuration back so end == fullDuration.func clipWindowsFull(starts, ends []float64, fullDuration float64) []ClipWindow {out := make([]ClipWindow, 0, len(starts))for i := range starts {s, e := starts[i], ends[i]if e > fullDuration {s -= e - fullDuratione = fullDurationif s < 0 {s = 0}}out = append(out, ClipWindow{Start: s, End: e})}return out}// clipWindowsExtend keeps ends as-is, even past fullDuration.func clipWindowsExtend(starts, ends []float64) []ClipWindow {out := make([]ClipWindow, 0, len(starts))for i := range starts {out = append(out, ClipWindow{Start: starts[i], End: ends[i]})}return out}// dedupClips removes consecutive duplicates while preserving order.// Matches pandas.DataFrame.drop_duplicates() at the end of OPSO's// generate_clip_times_df.func dedupClips(in []ClipWindow) []ClipWindow {if len(in) <= 1 {return in}seen := make(map[ClipWindow]bool, len(in))out := make([]ClipWindow, 0, len(in))for _, c := range in {if !seen[c] {seen[c] = trueout = append(out, c)}}return out}
a := &utils.Label{Filter: "kiwi.txt", Species: "Kiwi"}b := &utils.Label{Filter: "tomtit.txt", Species: "Tomtit"}c := &utils.Label{Filter: "kiwi.txt", Species: "Kiwi2"}labels := []*utils.Label{a, b, c}
a := &datafile.Label{Filter: "kiwi.txt", Species: "Kiwi"}b := &datafile.Label{Filter: "tomtit.txt", Species: "Tomtit"}c := &datafile.Label{Filter: "kiwi.txt", Species: "Kiwi2"}labels := []*datafile.Label{a, b, c}
trackMeta(&utils.DataMeta{Operator: "alice", Reviewer: ""}, ops, revs)trackMeta(&utils.DataMeta{Operator: "", Reviewer: "bob"}, ops, revs)trackMeta(&utils.DataMeta{Operator: "alice", Reviewer: "bob"}, ops, revs)
trackMeta(&datafile.DataMeta{Operator: "alice", Reviewer: ""}, ops, revs)trackMeta(&datafile.DataMeta{Operator: "", Reviewer: "bob"}, ops, revs)trackMeta(&datafile.DataMeta{Operator: "alice", Reviewer: "bob"}, ops, revs)
df := &utils.DataFile{Meta: &utils.DataMeta{Operator: "Manual", Duration: 60, Reviewer: "AI"},Segments: []*utils.Segment{
df := &datafile.DataFile{Meta: &datafile.DataMeta{Operator: "Manual", Duration: 60, Reviewer: "AI"},Segments: []*datafile.Segment{
func findSegmentsByTimeRange(segments []*utils.Segment, startTime, endTime float64) []*utils.Segment {var matches []*utils.Segment
func findSegmentsByTimeRange(segments []*datafile.Segment, startTime, endTime float64) []*datafile.Segment {var matches []*datafile.Segment
func findMatchingLabels(segment *utils.Segment, species, callType, filter string) ([]*utils.Label, string) {var matches []*utils.Label
func findMatchingLabels(segment *datafile.Segment, species, callType, filter string) ([]*datafile.Label, string) {var matches []*datafile.Label
func removeLabelFromSegment(segment *utils.Segment, toRemove []*utils.Label) {removeSet := make(map[*utils.Label]bool)
func removeLabelFromSegment(segment *datafile.Segment, toRemove []*datafile.Label) {removeSet := make(map[*datafile.Label]bool)
func resolveTargetSegment(dataFile *utils.DataFile, input CallsRemoveInput, output *CallsRemoveOutput) (*utils.Segment, error) {
func resolveTargetSegment(dataFile *datafile.DataFile, input CallsRemoveInput, output *CallsRemoveOutput) (*datafile.Segment, error) {
func propagateTargets(df *utils.DataFile, sources []sourceRef, input CallsPropagateInput, output *CallsPropagateOutput) {
func propagateTargets(df *datafile.DataFile, sources []sourceRef, input CallsPropagateInput, output *CallsPropagateOutput) {
func applyPropagation(toLabel *utils.Label, species, callType string, tSeg *utils.Segment, output *CallsPropagateOutput) {
func applyPropagation(toLabel *datafile.Label, species, callType string, tSeg *datafile.Segment, output *CallsPropagateOutput) {
func applyLabelChanges(label *utils.Label, dataFile *utils.DataFile, input CallsModifyInput, newSpecies, newCallType string, output *CallsModifyOutput) error {
func applyLabelChanges(label *datafile.Label, dataFile *datafile.DataFile, input CallsModifyInput, newSpecies, newCallType string, output *CallsModifyOutput) error {
// convertAviaNZSegment converts an AviaNZSegment to utils.Segmentfunc convertAviaNZSegment(seg AviaNZSegment, filter string) *utils.Segment {
// convertAviaNZSegment converts an AviaNZSegment to datafile.Segmentfunc convertAviaNZSegment(seg AviaNZSegment, filter string) *datafile.Segment {
kiwiLabel := &utils.Label{Filter: "kiwi.txt", Species: "Kiwi", CallType: "song", Certainty: 100}tomtitLabel := &utils.Label{Filter: "tomtit.txt", Species: "Tomtit", Certainty: 80}
kiwiLabel := &datafile.Label{Filter: "kiwi.txt", Species: "Kiwi", CallType: "song", Certainty: 100}tomtitLabel := &datafile.Label{Filter: "tomtit.txt", Species: "Tomtit", Certainty: 80}
segments := []*utils.Segment{makeSeg([]*utils.Label{kiwiLabel}),makeSeg([]*utils.Label{tomtitLabel}),makeSeg([]*utils.Label{kiwiLabel, tomtitLabel}),makeSeg([]*utils.Label{}),
segments := []*datafile.Segment{makeSeg([]*datafile.Label{kiwiLabel}),makeSeg([]*datafile.Label{tomtitLabel}),makeSeg([]*datafile.Label{kiwiLabel, tomtitLabel}),makeSeg([]*datafile.Label{}),
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{Meta: &utils.DataMeta{Duration: 20},Segments: []*utils.Segment{
writeDataFile(t, dir, "rec.wav.data", &datafile.DataFile{Meta: &datafile.DataMeta{Duration: 20},Segments: []*datafile.Segment{
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{Meta: &utils.DataMeta{Duration: 15},Segments: []*utils.Segment{
writeDataFile(t, dir, "rec.wav.data", &datafile.DataFile{Meta: &datafile.DataMeta{Duration: 15},Segments: []*datafile.Segment{
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{Meta: &utils.DataMeta{Duration: 10},Segments: []*utils.Segment{
writeDataFile(t, dir, "rec.wav.data", &datafile.DataFile{Meta: &datafile.DataMeta{Duration: 10},Segments: []*datafile.Segment{
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{Meta: &utils.DataMeta{Duration: 15},Segments: []*utils.Segment{
writeDataFile(t, dir, "rec.wav.data", &datafile.DataFile{Meta: &datafile.DataMeta{Duration: 15},Segments: []*datafile.Segment{
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{Meta: &utils.DataMeta{Duration: 10},Segments: []*utils.Segment{
writeDataFile(t, dir, "rec.wav.data", &datafile.DataFile{Meta: &datafile.DataMeta{Duration: 10},Segments: []*datafile.Segment{
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{Meta: &utils.DataMeta{Duration: 10},Segments: []*utils.Segment{
writeDataFile(t, dir, "rec.wav.data", &datafile.DataFile{Meta: &datafile.DataMeta{Duration: 10},Segments: []*datafile.Segment{
writeDataFile(t, dir, "a.wav.data", &utils.DataFile{Meta: &utils.DataMeta{Duration: 5},Segments: []*utils.Segment{
writeDataFile(t, dir, "a.wav.data", &datafile.DataFile{Meta: &datafile.DataMeta{Duration: 5},Segments: []*datafile.Segment{
writeDataFile(t, dir, "a.wav.data", &utils.DataFile{Meta: &utils.DataMeta{Duration: 10},Segments: []*utils.Segment{
writeDataFile(t, dir, "a.wav.data", &datafile.DataFile{Meta: &datafile.DataMeta{Duration: 10},Segments: []*datafile.Segment{
writeDataFile(t, dir, "b.wav.data", &utils.DataFile{Meta: &utils.DataMeta{Duration: 5},Segments: []*utils.Segment{
writeDataFile(t, dir, "b.wav.data", &datafile.DataFile{Meta: &datafile.DataMeta{Duration: 5},Segments: []*datafile.Segment{
func validateClipLabelsInput(input CallsClipLabelsInput) (utils.FinalClipMode, error) {finalClipMode, err := utils.ParseFinalClipMode(input.FinalClip)
func validateClipLabelsInput(input CallsClipLabelsInput) (FinalClipMode, error) {finalClipMode, err := ParseFinalClipMode(input.FinalClip)
func collectSpeciesFromDataFile(path, filter string) (*utils.DataFile, map[string]bool, error) {df, err := utils.ParseDataFile(path)
func collectSpeciesFromDataFile(path, filter string) (*datafile.DataFile, map[string]bool, error) {df, err := datafile.ParseDataFile(path)
func resolveLabel(lbl *utils.Label, seg *utils.Segment, filter string, mf mapping.File, classIdx map[string]int) (resolvedSeg, bool, bool) {
func resolveLabel(lbl *datafile.Label, seg *datafile.Segment, filter string, mf mapping.File, classIdx map[string]int) (resolvedSeg, bool, bool) {
func labelClipWindows(windows []utils.ClipWindow, segs []resolvedSeg, rel string, classes []string, minLabelOverlap float64, out *CallsClipLabelsOutput) []clipLabelsRow {
func labelClipWindows(windows []ClipWindow, segs []resolvedSeg, rel string, classes []string, minLabelOverlap float64, out *CallsClipLabelsOutput) []clipLabelsRow {
func classifyClip(w utils.ClipWindow, segs []resolvedSeg, minLabelOverlap float64, nClasses int) (clipDisposition, []bool) {
func classifyClip(w ClipWindow, segs []resolvedSeg, minLabelOverlap float64, nClasses int) (clipDisposition, []bool) {
func filterSegments(segments []*utils.Segment, filter, speciesName, callType string, certainty int) []*utils.Segment {var matching []*utils.Segment
func filterSegments(segments []*datafile.Segment, filter, speciesName, callType string, certainty int) []*datafile.Segment {var matching []*datafile.Segment
func processSegments(segments []*utils.Segment, dataPath string, samples []float64, sampleRate int, outputDir, prefix, basename string, imgSize int, color bool) ([]string, []string) {
func processSegments(segments []*datafile.Segment, dataPath string, samples []float64, sampleRate int, outputDir, prefix, basename string, imgSize int, color bool) ([]string, []string) {
func processSegmentsParallel(segments []*utils.Segment, dataPath string, samples []float64, sampleRate int, outputDir, prefix, basename string, imgSize int, color bool) ([]string, []string) {
func processSegmentsParallel(segments []*datafile.Segment, dataPath string, samples []float64, sampleRate int, outputDir, prefix, basename string, imgSize int, color bool) ([]string, []string) {
df1 := &utils.DataFile{FilePath: "/test/alpha.data", Segments: []*utils.Segment{{}}}df2 := &utils.DataFile{FilePath: "/test/beta.data", Segments: []*utils.Segment{{}}}
df1 := &datafile.DataFile{FilePath: "/test/alpha.data", Segments: []*datafile.Segment{{}}}df2 := &datafile.DataFile{FilePath: "/test/beta.data", Segments: []*datafile.Segment{{}}}
df1 := &utils.DataFile{FilePath: "/test/alpha.data", Segments: []*utils.Segment{{}}}segs := [][]*utils.Segment{df1.Segments}
df1 := &datafile.DataFile{FilePath: "/test/alpha.data", Segments: []*datafile.Segment{{}}}segs := [][]*datafile.Segment{df1.Segments}
df1 := &utils.DataFile{FilePath: "/test/alpha.data", Segments: []*utils.Segment{{}}}segs := [][]*utils.Segment{df1.Segments}
df1 := &datafile.DataFile{FilePath: "/test/alpha.data", Segments: []*datafile.Segment{{}}}segs := [][]*datafile.Segment{df1.Segments}
Meta: &utils.DataMeta{},Segments: []*utils.Segment{{Labels: []*utils.Label{{Species: "Kiwi", Filter: "f", Bookmark: true}}},{Labels: []*utils.Label{{Species: "Tomtit", Filter: "f"}}},{Labels: []*utils.Label{{Species: "Roroa", Filter: "f", Bookmark: true}}},
Meta: &datafile.DataMeta{},Segments: []*datafile.Segment{{Labels: []*datafile.Label{{Species: "Kiwi", Filter: "f", Bookmark: true}}},{Labels: []*datafile.Label{{Species: "Tomtit", Filter: "f"}}},{Labels: []*datafile.Label{{Species: "Roroa", Filter: "f", Bookmark: true}}},
Meta: &utils.DataMeta{},Segments: []*utils.Segment{{Labels: []*utils.Label{{Species: "Kiwi", Filter: "f", Certainty: 70}}},
Meta: &datafile.DataMeta{},Segments: []*datafile.Segment{{Labels: []*datafile.Label{{Species: "Kiwi", Filter: "f", Certainty: 70}}},
state6 := NewClassifyState(ClassifyConfig{Filter: "model-1.0", Species: "Kiwi", Certainty: -1}, []*utils.DataFile{df3})
state6 := NewClassifyState(ClassifyConfig{Filter: "model-1.0", Species: "Kiwi", Certainty: -1}, []*datafile.DataFile{df3})
df1 := &utils.DataFile{FilePath: "/test/f1.data", Segments: makeSegs(6)}df2 := &utils.DataFile{FilePath: "/test/f2.data", Segments: makeSegs(4)}kept := []*utils.DataFile{df1, df2}cached := [][]*utils.Segment{df1.Segments, df2.Segments}
df1 := &datafile.DataFile{FilePath: "/test/f1.data", Segments: makeSegs(6)}df2 := &datafile.DataFile{FilePath: "/test/f2.data", Segments: makeSegs(4)}kept := []*datafile.DataFile{df1, df2}cached := [][]*datafile.Segment{df1.Segments, df2.Segments}
state1 := NewClassifyState(ClassifyConfig{Species: "Kiwi", CallType: utils.CallTypeNone, Certainty: -1}, []*utils.DataFile{df})
state1 := NewClassifyState(ClassifyConfig{Species: "Kiwi", CallType: datafile.CallTypeNone, Certainty: -1}, []*datafile.DataFile{df})
DataFiles []*utils.DataFilefilteredSegs [][]*utils.Segment // cached at load time, parallel to DataFilestotalSegs int // pre-computed total segment count
DataFiles []*datafile.DataFilefilteredSegs [][]*datafile.Segment // cached at load time, parallel to DataFilestotalSegs int // pre-computed total segment count
func filterDataFiles(dataFiles []*utils.DataFile, config ClassifyConfig) ([]*utils.DataFile, [][]*utils.Segment, int) {var kept []*utils.DataFilevar cachedSegs [][]*utils.Segment
func filterDataFiles(dataFiles []*datafile.DataFile, config ClassifyConfig) ([]*datafile.DataFile, [][]*datafile.Segment, int) {var kept []*datafile.DataFilevar cachedSegs [][]*datafile.Segment
func buildClassifyState(config ClassifyConfig, dataFiles []*utils.DataFile, filteredSegs [][]*utils.Segment, timeFiltered int) (*ClassifyState, error) {
func buildClassifyState(config ClassifyConfig, dataFiles []*datafile.DataFile, filteredSegs [][]*datafile.Segment, timeFiltered int) (*ClassifyState, error) {
func applySampling(kept []*utils.DataFile, cachedSegs [][]*utils.Segment, sample int, rng *rand.Rand) ([]*utils.DataFile, [][]*utils.Segment) {
func applySampling(kept []*datafile.DataFile, cachedSegs [][]*datafile.Segment, sample int, rng *rand.Rand) ([]*datafile.DataFile, [][]*datafile.Segment) {
df := &utils.DataFile{Meta: &utils.DataMeta{Operator: "BirdNET", Duration: 10, Reviewer: "AI"},Segments: []*utils.Segment{
df := &datafile.DataFile{Meta: &datafile.DataMeta{Operator: "BirdNET", Duration: 10, Reviewer: "AI"},Segments: []*datafile.Segment{
func addLabelToSegment(segment *utils.Segment, species, callType string, input CallsAddInput, output *CallsAddOutput) (CallsAddOutput, error) {
func addLabelToSegment(segment *datafile.Segment, species, callType string, input CallsAddInput, output *CallsAddOutput) (CallsAddOutput, error) {
func createNewSegment(dataFile *utils.DataFile, species, callType string, input CallsAddInput, output *CallsAddOutput) (CallsAddOutput, error) {newSeg := &utils.Segment{
func createNewSegment(dataFile *datafile.DataFile, species, callType string, input CallsAddInput, output *CallsAddOutput) (CallsAddOutput, error) {newSeg := &datafile.Segment{
package datafileimport ("os""path/filepath""sort""testing")func TestFindDataFiles_Basic(t *testing.T) {dir := t.TempDir()// Create some .data filesfor _, name := range []string{"a.data", "b.data", "c.data"} {if err := os.WriteFile(filepath.Join(dir, name), []byte("[]"), 0644); err != nil {t.Fatal(err)}}// Create a non-.data file that should be ignoredif err := os.WriteFile(filepath.Join(dir, "notes.txt"), []byte("ignore"), 0644); err != nil {t.Fatal(err)}files, err := FindDataFiles(dir)if err != nil {t.Fatal(err)}sort.Strings(files)if len(files) != 3 {t.Fatalf("expected 3 files, got %d: %v", len(files), files)}for i, base := range []string{"a.data", "b.data", "c.data"} {expected := filepath.Join(dir, base)if files[i] != expected {t.Errorf("file %d: got %q, want %q", i, files[i], expected)}}}func TestFindDataFiles_SkipsHidden(t *testing.T) {dir := t.TempDir()// Regular .data fileif err := os.WriteFile(filepath.Join(dir, "visible.data"), []byte("[]"), 0644); err != nil {t.Fatal(err)}// Hidden .data file (should be skipped)if err := os.WriteFile(filepath.Join(dir, ".hidden.data"), []byte("[]"), 0644); err != nil {t.Fatal(err)}files, err := FindDataFiles(dir)if err != nil {t.Fatal(err)}if len(files) != 1 {t.Fatalf("expected 1 file (hidden skipped), got %d: %v", len(files), files)}if filepath.Base(files[0]) != "visible.data" {t.Errorf("got %q, want visible.data", files[0])}}func TestFindDataFiles_NonRecursive(t *testing.T) {dir := t.TempDir()// .data file in rootif err := os.WriteFile(filepath.Join(dir, "root.data"), []byte("[]"), 0644); err != nil {t.Fatal(err)}// .data file in subdirectory (should NOT be found)sub := filepath.Join(dir, "subdir")if err := os.Mkdir(sub, 0755); err != nil {t.Fatal(err)}if err := os.WriteFile(filepath.Join(sub, "nested.data"), []byte("[]"), 0644); err != nil {t.Fatal(err)}files, err := FindDataFiles(dir)if err != nil {t.Fatal(err)}if len(files) != 1 {t.Fatalf("expected 1 file (non-recursive), got %d: %v", len(files), files)}if filepath.Base(files[0]) != "root.data" {t.Errorf("got %q, want root.data", files[0])}}func TestFindDataFiles_EmptyDir(t *testing.T) {dir := t.TempDir()files, err := FindDataFiles(dir)if err != nil {t.Fatal(err)}if len(files) != 0 {t.Errorf("expected 0 files, got %d", len(files))}}func TestFindDataFiles_NonexistentDir(t *testing.T) {_, err := FindDataFiles("/nonexistent/path/12345")if err == nil {t.Error("expected error for nonexistent directory")}}func TestFindDataFiles_NoDataFiles(t *testing.T) {dir := t.TempDir()if err := os.WriteFile(filepath.Join(dir, "readme.txt"), []byte("hello"), 0644); err != nil {t.Fatal(err)}files, err := FindDataFiles(dir)if err != nil {t.Fatal(err)}if len(files) != 0 {t.Errorf("expected 0 files, got %d", len(files))}}
package datafileimport ("os""testing")func TestDataFileParse(t *testing.T) {// Create a test .data filecontent := `[{"Operator": "Auto", "Reviewer": null, "Duration": 60.0},[10.0, 20.0, 0, 0, [{"species": "Kiwi", "certainty": 70, "filter": "test-filter"}]],[30.0, 40.0, 1000, 5000, [{"species": "Morepork", "certainty": 80, "filter": "M"}]]]`tmpfile, err := os.CreateTemp("", "test*.data")if err != nil {t.Fatal(err)}defer os.Remove(tmpfile.Name())if _, err := tmpfile.Write([]byte(content)); err != nil {t.Fatal(err)}tmpfile.Close()// Parsedf, err := ParseDataFile(tmpfile.Name())if err != nil {t.Fatal(err)}// Check metadataif df.Meta.Operator != "Auto" {t.Errorf("expected Operator=Auto, got %s", df.Meta.Operator)}if df.Meta.Duration != 60.0 {t.Errorf("expected Duration=60.0, got %f", df.Meta.Duration)}// Check segmentsif len(df.Segments) != 2 {t.Errorf("expected 2 segments, got %d", len(df.Segments))}// Check first segment (sorted by start time)if df.Segments[0].StartTime != 10.0 {t.Errorf("expected StartTime=10.0, got %f", df.Segments[0].StartTime)}if df.Segments[0].EndTime != 20.0 {t.Errorf("expected EndTime=20.0, got %f", df.Segments[0].EndTime)}// Check labelsif len(df.Segments[0].Labels) != 1 {t.Errorf("expected 1 label, got %d", len(df.Segments[0].Labels))}if df.Segments[0].Labels[0].Species != "Kiwi" {t.Errorf("expected Species=Kiwi, got %s", df.Segments[0].Labels[0].Species)}if df.Segments[0].Labels[0].Certainty != 70 {t.Errorf("expected Certainty=70, got %d", df.Segments[0].Labels[0].Certainty)}}func TestDataFileWrite(t *testing.T) {df := &DataFile{FilePath: "",Meta: &DataMeta{Operator: "Test",Reviewer: "David",Duration: 120.0,},Segments: []*Segment{{StartTime: 5.0,EndTime: 15.0,FreqLow: 0,FreqHigh: 0,Labels: []*Label{{Species: "Kiwi", Certainty: 100, Filter: "test"},},},},}tmpfile, err := os.CreateTemp("", "test*.data")if err != nil {t.Fatal(err)}tmpfile.Close()defer os.Remove(tmpfile.Name())// Writeif err := df.Write(tmpfile.Name()); err != nil {t.Fatal(err)}// Re-parse and verifydf2, err := ParseDataFile(tmpfile.Name())if err != nil {t.Fatal(err)}if df2.Meta.Reviewer != "David" {t.Errorf("expected Reviewer=David, got %s", df2.Meta.Reviewer)}if len(df2.Segments) != 1 {t.Errorf("expected 1 segment, got %d", len(df2.Segments))}if df2.Segments[0].Labels[0].Species != "Kiwi" {t.Errorf("expected Species=Kiwi, got %s", df2.Segments[0].Labels[0].Species)}}func TestHasFilterLabel(t *testing.T) {seg := &Segment{Labels: []*Label{{Species: "Kiwi", Filter: "test-filter"},{Species: "Morepork", Filter: "M"},},}if !seg.HasFilterLabel("test-filter") {t.Error("expected HasFilterLabel(test-filter)=true")}if !seg.HasFilterLabel("M") {t.Error("expected HasFilterLabel(M)=true")}if seg.HasFilterLabel("other") {t.Error("expected HasFilterLabel(other)=false")}if !seg.HasFilterLabel("") {t.Error("expected HasFilterLabel('')=true (no filter)")}}func TestGetFilterLabels(t *testing.T) {seg := &Segment{Labels: []*Label{{Species: "Kiwi", Filter: "test-filter", Certainty: 70},{Species: "Morepork", Filter: "M", Certainty: 80},{Species: "Don't Know", Filter: "test-filter", Certainty: 0},},}labels := seg.GetFilterLabels("test-filter")if len(labels) != 2 {t.Errorf("expected 2 labels, got %d", len(labels))}labels = seg.GetFilterLabels("")if len(labels) != 3 {t.Errorf("expected 3 labels (no filter), got %d", len(labels))}}func TestLabelComment(t *testing.T) {// Test parsing comment from .data filecontent := `[{"Operator": "Test", "Duration": 60.0},[10.0, 20.0, 0, 0, [{"species": "Kiwi", "certainty": 100, "filter": "M", "comment": "Good call"}]]]`tmpfile, err := os.CreateTemp("", "test*.data")if err != nil {t.Fatal(err)}defer os.Remove(tmpfile.Name())if _, err := tmpfile.Write([]byte(content)); err != nil {t.Fatal(err)}tmpfile.Close()df, err := ParseDataFile(tmpfile.Name())if err != nil {t.Fatal(err)}if df.Segments[0].Labels[0].Comment != "Good call" {t.Errorf("expected Comment='Good call', got '%s'", df.Segments[0].Labels[0].Comment)}// Test writing commentdf.Segments[0].Labels[0].Comment = "Updated comment"tmpfile2, err := os.CreateTemp("", "test2*.data")if err != nil {t.Fatal(err)}tmpfile2.Close()defer os.Remove(tmpfile2.Name())if err := df.Write(tmpfile2.Name()); err != nil {t.Fatal(err)}// Re-parse and verifydf2, err := ParseDataFile(tmpfile2.Name())if err != nil {t.Fatal(err)}if df2.Segments[0].Labels[0].Comment != "Updated comment" {t.Errorf("expected Comment='Updated comment', got '%s'", df2.Segments[0].Labels[0].Comment)}}func TestSkraakHashRoundTrip(t *testing.T) {// Test that skraak_hash in metadata is preserved through parse/write cycledf := &DataFile{Meta: &DataMeta{Operator: "Test",Duration: 60.0,Extra: map[string]any{"skraak_hash": "abc123def456",},},Segments: []*Segment{{StartTime: 10.0,EndTime: 20.0,Labels: []*Label{{Species: "Kiwi", Certainty: 100, Filter: "M"},},},},}tmpfile, err := os.CreateTemp("", "test*.data")if err != nil {t.Fatal(err)}tmpfile.Close()defer os.Remove(tmpfile.Name())// Writeif err := df.Write(tmpfile.Name()); err != nil {t.Fatal(err)}// Re-parsedf2, err := ParseDataFile(tmpfile.Name())if err != nil {t.Fatal(err)}// Verify skraak_hash preservedif df2.Meta.Extra == nil {t.Fatal("expected Extra to be non-nil")}hash, ok := df2.Meta.Extra["skraak_hash"].(string)if !ok {t.Fatal("expected skraak_hash to be string")}if hash != "abc123def456" {t.Errorf("expected skraak_hash=abc123def456, got %s", hash)}}func TestSkraakLabelIDRoundTrip(t *testing.T) {// Test that skraak_label_id in labels is preserved through parse/write cycledf := &DataFile{Meta: &DataMeta{Operator: "Test",Duration: 60.0,},Segments: []*Segment{{StartTime: 10.0,EndTime: 20.0,Labels: []*Label{{Species: "Kiwi",Certainty: 100,Filter: "M",Extra: map[string]any{"skraak_label_id": "label_abc123",},},},},},}tmpfile, err := os.CreateTemp("", "test*.data")if err != nil {t.Fatal(err)}tmpfile.Close()defer os.Remove(tmpfile.Name())// Writeif err := df.Write(tmpfile.Name()); err != nil {t.Fatal(err)}// Re-parsedf2, err := ParseDataFile(tmpfile.Name())if err != nil {t.Fatal(err)}// Verify skraak_label_id preservedif len(df2.Segments) != 1 {t.Fatalf("expected 1 segment, got %d", len(df2.Segments))}if len(df2.Segments[0].Labels) != 1 {t.Fatalf("expected 1 label, got %d", len(df2.Segments[0].Labels))}label := df2.Segments[0].Labels[0]if label.Extra == nil {t.Fatal("expected label Extra to be non-nil")}labelID, ok := label.Extra["skraak_label_id"].(string)if !ok {t.Fatal("expected skraak_label_id to be string")}if labelID != "label_abc123" {t.Errorf("expected skraak_label_id=label_abc123, got %s", labelID)}}func TestSkraakFieldsBothPresent(t *testing.T) {// Test both skraak_hash and skraak_label_id togetherdf := &DataFile{Meta: &DataMeta{Operator: "Test",Duration: 60.0,Extra: map[string]any{"skraak_hash": "file_hash_xyz",},},Segments: []*Segment{{StartTime: 10.0,EndTime: 20.0,Labels: []*Label{{Species: "Kiwi",Certainty: 100,Filter: "M",Extra: map[string]any{"skraak_label_id": "label_id_1",},},{Species: "Roroa",Certainty: 90,Filter: "M",Extra: map[string]any{"skraak_label_id": "label_id_2",},},},},},}tmpfile, err := os.CreateTemp("", "test*.data")if err != nil {t.Fatal(err)}tmpfile.Close()defer os.Remove(tmpfile.Name())// Writeif err := df.Write(tmpfile.Name()); err != nil {t.Fatal(err)}// Re-parsedf2, err := ParseDataFile(tmpfile.Name())if err != nil {t.Fatal(err)}// Verify skraak_hashif df2.Meta.Extra["skraak_hash"] != "file_hash_xyz" {t.Errorf("expected skraak_hash=file_hash_xyz, got %v", df2.Meta.Extra["skraak_hash"])}// Verify both label IDsif len(df2.Segments[0].Labels) != 2 {t.Fatalf("expected 2 labels, got %d", len(df2.Segments[0].Labels))}labelIDs := []string{"label_id_1", "label_id_2"}for i, label := range df2.Segments[0].Labels {if label.Extra["skraak_label_id"] != labelIDs[i] {t.Errorf("label %d: expected skraak_label_id=%s, got %v", i, labelIDs[i], label.Extra["skraak_label_id"])}}}func TestSegmentMatchesFilters(t *testing.T) {// Create test segments with various labelsseg := &Segment{Labels: []*Label{{Species: "Kiwi", Filter: "model-1.0", CallType: "Duet", Certainty: 70},{Species: "Morepork", Filter: "model-2.0", CallType: "", Certainty: 100},},}tests := []struct {name stringfilter stringspecies stringcallType stringcertainty intwant bool}{{"no filters", "", "", "", -1, true},{"filter only match", "model-1.0", "", "", -1, true},{"filter only no match", "model-3.0", "", "", -1, false},{"species only match", "", "Kiwi", "", -1, true},{"species only no match", "", "Tomtit", "", -1, false},{"calltype only match", "", "", "Duet", -1, true},{"calltype only no match", "", "", "Male", -1, false},{"certainty match", "", "", "", 70, true},{"certainty no match", "", "", "", 80, false},{"certainty 100 match", "", "", "", 100, true},{"filter+species match", "model-1.0", "Kiwi", "", -1, true},{"filter+species+calltype match", "model-1.0", "Kiwi", "Duet", -1, true},{"filter+species+calltype+certainty match", "model-1.0", "Kiwi", "Duet", 70, true},{"filter+species+calltype certainty miss", "model-1.0", "Kiwi", "Duet", 100, false},{"filter match species miss", "model-1.0", "Morepork", "", -1, false},{"all miss", "model-3.0", "Tomtit", "Male", -1, false},{"CallTypeNone matches empty calltype", "model-2.0", "Morepork", CallTypeNone, -1, true},{"CallTypeNone skips non-empty calltype", "model-1.0", "Kiwi", CallTypeNone, -1, false},{"CallTypeNone + certainty match", "model-2.0", "Morepork", CallTypeNone, 100, true},{"CallTypeNone + certainty miss", "model-2.0", "Morepork", CallTypeNone, 70, false},}for _, tt := range tests {t.Run(tt.name, func(t *testing.T) {got := seg.SegmentMatchesFilters(tt.filter, tt.species, tt.callType, tt.certainty)if got != tt.want {t.Errorf("SegmentMatchesFilters(%q, %q, %q, %d) = %v, want %v",tt.filter, tt.species, tt.callType, tt.certainty, got, tt.want)}})}}func TestParseSpeciesCallType(t *testing.T) {tests := []struct {input stringspecies stringcallType string}{{"", "", ""},{"Kiwi", "Kiwi", ""},{"Kiwi+Duet", "Kiwi", "Duet"},{"GSK+Female", "GSK", "Female"},{"Species+With+Multiple+Plus", "Species", "With+Multiple+Plus"},{"Kiwi+_", "Kiwi", "_"},}for _, tt := range tests {t.Run(tt.input, func(t *testing.T) {species, callType := ParseSpeciesCallType(tt.input)if species != tt.species || callType != tt.callType {t.Errorf("ParseSpeciesCallType(%q) = (%q, %q), want (%q, %q)",tt.input, species, callType, tt.species, tt.callType)}})}}
package datafileimport ("encoding/json""fmt""maps""os""sort""strings""skraak/utils")// DataFile represents an AviaNZ .data filetype DataFile struct {Meta *DataMetaSegments []*SegmentFilePath string}// DataMeta contains metadata for a .data filetype DataMeta struct {Operator stringReviewer stringDuration float64Extra map[string]any // preserve unknown fields}// Segment represents a detection segmenttype Segment struct {StartTime float64EndTime float64FreqLow float64FreqHigh float64Labels []*Label}// CallTypeNone is a sentinel value used in --species Species+_ to match// only labels with an empty calltype.const CallTypeNone = "_"// Label represents a species label within a segmenttype Label struct {Species stringCertainty intFilter stringCallType stringComment string // user comment (max 140 chars, ASCII only)Bookmark bool // user bookmark for navigationExtra map[string]any // preserve unknown fields}// ParseDataFile reads and parses a .data filefunc ParseDataFile(path string) (*DataFile, error) {data, err := os.ReadFile(path)if err != nil {return nil, err}var raw []json.RawMessageif err := json.Unmarshal(data, &raw); err != nil {return nil, fmt.Errorf("parse JSON: %w", err)}if len(raw) == 0 {return nil, fmt.Errorf("empty .data file")}df := &DataFile{FilePath: path,Segments: make([]*Segment, 0, len(raw)-1),}// Parse metadata (first element)df.Meta = parseMeta(raw[0])// Parse segmentsfor i := 1; i < len(raw); i++ {seg, err := parseSegment(raw[i])if err != nil {continue // skip invalid segments}df.Segments = append(df.Segments, seg)}// Sort segments by start timesort.Slice(df.Segments, func(i, j int) bool {return df.Segments[i].StartTime < df.Segments[j].StartTime})return df, nil}// parseMeta parses the metadata objectfunc parseMeta(raw json.RawMessage) *DataMeta {var obj map[string]anyif err := json.Unmarshal(raw, &obj); err != nil {return &DataMeta{}}meta := &DataMeta{Extra: make(map[string]any)}if v, ok := obj["Operator"].(string); ok {meta.Operator = vdelete(obj, "Operator")}if v, ok := obj["Reviewer"].(string); ok {meta.Reviewer = vdelete(obj, "Reviewer")}if v, ok := obj["Duration"].(float64); ok {meta.Duration = vdelete(obj, "Duration")}// Store remaining fieldsmaps.Copy(meta.Extra, obj)return meta}// parseSegment parses a segment arrayfunc parseSegment(raw json.RawMessage) (*Segment, error) {var arr []json.RawMessageif err := json.Unmarshal(raw, &arr); err != nil {return nil, err}if len(arr) < 5 {return nil, fmt.Errorf("segment too short")}seg := &Segment{}// Parse time and frequencyif v, err := parseFloat(arr[0]); err == nil {seg.StartTime = v}if v, err := parseFloat(arr[1]); err == nil {seg.EndTime = v}if v, err := parseFloat(arr[2]); err == nil {seg.FreqLow = v}if v, err := parseFloat(arr[3]); err == nil {seg.FreqHigh = v}// Parse labelsvar labelArr []json.RawMessageif err := json.Unmarshal(arr[4], &labelArr); err == nil {for _, labelRaw := range labelArr {if label := parseLabel(labelRaw); label != nil {seg.Labels = append(seg.Labels, label)}}}// Sort labels alphabetically by speciessort.Slice(seg.Labels, func(i, j int) bool {return seg.Labels[i].Species < seg.Labels[j].Species})return seg, nil}// parseLabel parses a label objectfunc parseLabel(raw json.RawMessage) *Label {var obj map[string]anyif err := json.Unmarshal(raw, &obj); err != nil {return nil}label := &Label{Extra: make(map[string]any)}if v, ok := obj["species"].(string); ok {label.Species = vdelete(obj, "species")}if v, ok := obj["certainty"].(float64); ok {label.Certainty = int(v)delete(obj, "certainty")}if v, ok := obj["filter"].(string); ok {label.Filter = vdelete(obj, "filter")}if v, ok := obj["calltype"].(string); ok {label.CallType = vdelete(obj, "calltype")}if v, ok := obj["comment"].(string); ok {label.Comment = vdelete(obj, "comment")}if v, ok := obj["bookmark"].(bool); ok {label.Bookmark = vdelete(obj, "bookmark")}// Store remaining fieldsmaps.Copy(label.Extra, obj)return label}// parseFloat extracts a float from JSONfunc parseFloat(raw json.RawMessage) (float64, error) {var v float64err := json.Unmarshal(raw, &v)return v, err}// WriteDataFile writes a DataFile back to diskfunc (df *DataFile) Write(path string) error {var raw []any// Build metadatameta := make(map[string]any)if df.Meta.Operator != "" {meta["Operator"] = df.Meta.Operator}if df.Meta.Reviewer != "" {meta["Reviewer"] = df.Meta.Reviewer}if df.Meta.Duration > 0 {meta["Duration"] = df.Meta.Duration}maps.Copy(meta, df.Meta.Extra)raw = append(raw, meta)// Build segmentsfor _, seg := range df.Segments {labels := make([]any, 0, len(seg.Labels))for _, label := range seg.Labels {l := make(map[string]any)l["species"] = label.Speciesl["certainty"] = label.Certaintyif label.Filter != "" {l["filter"] = label.Filter}if label.CallType != "" {l["calltype"] = label.CallType}if label.Comment != "" {l["comment"] = label.Comment}if label.Bookmark {l["bookmark"] = true}maps.Copy(l, label.Extra)labels = append(labels, l)}segArr := []any{seg.StartTime,seg.EndTime,seg.FreqLow,seg.FreqHigh,labels,}raw = append(raw, segArr)}data, err := json.MarshalIndent(raw, "", " ")if err != nil {return err}return os.WriteFile(path, data, 0644)}// HasFilterLabel returns true if segment has a label matching the filterfunc (s *Segment) HasFilterLabel(filter string) bool {if filter == "" {return true}for _, label := range s.Labels {if label.Filter == filter {return true}}return false}// GetFilterLabels returns labels matching the filterfunc (s *Segment) GetFilterLabels(filter string) []*Label {var result []*Labelfor _, label := range s.Labels {if filter == "" || label.Filter == filter {result = append(result, label)}}return result}// SegmentMatchesFilters returns true if the segment has any label matching all filter criteria.// All non-empty/non-negative parameters must match for a label to be considered a match.// Use certainty=-1 to indicate no certainty filtering (since 0 is a valid certainty value).func (s *Segment) SegmentMatchesFilters(filter, species, callType string, certainty int) bool {if filter == "" && species == "" && callType == "" && certainty < 0 {return true // No filters, match all}for _, label := range s.Labels {if labelMatchesFilters(label, filter, species, callType, certainty) {return true}}return false}// labelMatchesFilters checks if a single label matches all filter criteria.func labelMatchesFilters(label *Label, filter, species, callType string, certainty int) bool {if filter != "" && label.Filter != filter {return false}if species != "" && label.Species != species {return false}if callType == CallTypeNone {if label.CallType != "" {return false}} else if callType != "" && label.CallType != callType {return false}if certainty >= 0 && label.Certainty != certainty {return false}return true}// ParseSpeciesCallType parses a species string with optional calltype into separate values.// Format: "Species" or "Species+CallType" (e.g., "Kiwi" or "Kiwi+Duet").// Use "_" as the calltype to match only labels with no calltype (e.g., "Kiwi+_").func ParseSpeciesCallType(label string) (species, callType string) {if label == "" {return "", ""}if before, after, ok := strings.Cut(label, "+"); ok {return before, after}return label, ""}// FindDataFiles finds all .data files in a folder, ignoring hidden files (starting with ".")func FindDataFiles(folder string) ([]string, error) {return utils.FindFiles(folder, utils.FindFilesOptions{Extension: ".data",Recursive: false,SkipHidden: true,})}
package configimport ("os""path/filepath""testing")func TestLoadConfig(t *testing.T) {homeDir := t.TempDir()t.Setenv("HOME", homeDir)configDir := filepath.Join(homeDir, ".skraak")err := os.MkdirAll(configDir, 0755)if err != nil {t.Fatalf("failed to create config dir: %v", err)}jsonContent := `{"classify": {"reviewer": "Test Reviewer","color": true}}`err = os.WriteFile(filepath.Join(configDir, "config.json"), []byte(jsonContent), 0644)if err != nil {t.Fatalf("failed to write config: %v", err)}cfg, path, err := LoadConfig()if err != nil {t.Fatalf("unexpected error: %v", err)}if cfg.Classify.Reviewer != "Test Reviewer" {t.Errorf("expected Test Reviewer, got %s", cfg.Classify.Reviewer)}if !cfg.Classify.Color {t.Error("expected color to be true")}if path == "" {t.Error("expected path to be returned")}}
package configimport ("encoding/json""fmt""os""path/filepath")// ~/.skraak/config.json schema (reference)://// {// "classify": {// "reviewer": "string, required. Name stamped into .data file meta on any edit.",// "color": "bool, optional. Colored spectrograms in the TUI. Default false.",// "sixel": "bool, optional. Use sixel image protocol. Default false (Kitty).",// "iterm": "bool, optional. Use iTerm inline-image protocol. Default false.",// "img_dims": "int, optional. Spectrogram display size in pixels. 0 = default.",//// "bindings": {// "<key>": "Species" // e.g. "c": "comcha"// "<key>": "Species+CallType" // e.g. "1": "Kiwi+Duet"// // <key> is a single character. Reserved: ",", ".", "0", " " (space).// // Pressing <key> labels the current segment (certainty 100, or 0 for// // "Don't Know"), saves, and advances.// },//// "secondary_bindings": {// "<primary-key>": {// "<key>": "CallType" // e.g. "a": "alarm"// // <key> is a single character, same reserved-key rules as bindings.// // Outer <primary-key> must also exist in "bindings".// }// // Optional. Invoked via Shift+<primary-key>: labels the species with// // an empty calltype, does NOT advance, and waits for one follow-up// // key looked up in this inner map. Match -> set calltype, save,// // advance. Esc -> exit wait mode without advancing. Any other key ->// // exit wait mode and handle the key normally.// // Shift+<primary-key> on a primary without a secondary_bindings entry// // falls back to normal primary behavior.// }// }// }//// Example://// {// "classify": {// "reviewer": "David",// "color": true,// "bindings": {// "c": "comcha",// "k": "kea1",// "x": "Noise",// "z": "Don't Know",// "1": "Kiwi+Duet",// "4": "Kiwi"// },// "secondary_bindings": {// "c": { "a": "alarm", "s": "song", "n": "contact" }// }// }// }//// Config holds user-level defaults loaded from ~/.skraak/config.json.// Per-subcommand sections live as named fields.type Config struct {Classify ClassifyFileConfig `json:"classify"`}// ClassifyFileConfig holds defaults for `skraak calls classify`.// Bindings maps a single-character key to "Species" or "Species+CallType".type ClassifyFileConfig struct {Reviewer string `json:"reviewer"`Color bool `json:"color"`Sixel bool `json:"sixel"`ITerm bool `json:"iterm"`ImgDims int `json:"img_dims"`Bindings map[string]string `json:"bindings"`// SecondaryBindings extends a primary binding with per-species calltype// choices. Outer key is the primary binding key; inner map is// single-char key -> calltype string. Invoked via Shift+primary-key.SecondaryBindings map[string]map[string]string `json:"secondary_bindings,omitempty"`}// ConfigPath returns the absolute path to ~/.skraak/config.json.func ConfigPath() (string, error) {home, err := os.UserHomeDir()if err != nil {return "", fmt.Errorf("resolving home directory: %w", err)}return filepath.Join(home, ".skraak", "config.json"), nil}// LoadConfig reads ~/.skraak/config.json and returns the parsed config and the// resolved path (useful for error messages).func LoadConfig() (Config, string, error) {var cfg Configpath, err := ConfigPath()if err != nil {return cfg, "", err}data, err := os.ReadFile(path)if err != nil {return cfg, path, fmt.Errorf("reading %s: %w", path, err)}if err := json.Unmarshal(data, &cfg); err != nil {return cfg, path, fmt.Errorf("parsing %s: %w", path, err)}return cfg, path, nil}
func loadClassifyConfig() (utils.Config, string, []calls.KeyBinding, error) {cfg, cfgPath, err := utils.LoadConfig()
func loadClassifyConfig() (config.Config, string, []calls.KeyBinding, error) {cfg, cfgPath, err := config.LoadConfig()
func buildClassifyConfig(a classifyArgs, cfg utils.Config, bindings []calls.KeyBinding) (calls.ClassifyConfig, error) {speciesName, callType := utils.ParseSpeciesCallType(a.species)
func buildClassifyConfig(a classifyArgs, cfg config.Config, bindings []calls.KeyBinding) (calls.ClassifyConfig, error) {speciesName, callType := datafile.ParseSpeciesCallType(a.species)
cmd/ → tools, tools/calls, tools/import, tuitools/ → db, audio, wav, spectrogram, astro, mapping, utilstools/calls/ → db, audio, wav, spectrogram, mapping, utils (NO DB access in practice)tools/import/ → db, wav, astro, mapping, utils (defines own Mutator/Reader)tui/ → audio, wav, spectrogram, utils (NO db, NO cmd)db/ → wav (GainLevel only), utils (Placeholders only)
cmd/ → tools, tools/calls, tools/import, tui, configtools/calls/ → audio, wav, spectrogram, datafile, mapping, utils (filesystem only, NO db)tools/import/ → db, wav, astro, datafile, mapping, utils (defines own Mutator/Reader)tools/ → db, audio, wav, spectrogram, datafile, astro, mapping, utilstui/ → audio, wav, spectrogram, datafile, utils (NO db, NO cmd)db/ → wav (GainLevel alias only)
# cmd → tools, tools/calls, tools/import, tui, db, audio, wav, spectrogram, astro, mapping, utils# tools/calls → db, audio, wav, spectrogram, mapping, utils# tools/import → db, wav, astro, mapping, utils# tools → db, audio, wav, spectrogram, astro, mapping, utils# tui → audio, wav, spectrogram, utils# db → wav, utils# spectrogram → audio, wav# wav → audio, astro, utils# audio, astro, mapping, utils → (no skraak/* imports)
# cmd → tools, tools/calls, tools/import, tui, db, config, datafile, audio, wav, spectrogram, astro, mapping, utils# tools/calls → db, audio, wav, spectrogram, datafile, mapping, utils (filesystem only, no DB in practice)# tools/import → db, wav, astro, datafile, mapping, utils# tools → db, audio, wav, spectrogram, datafile, astro, mapping, utils# tui → audio, wav, spectrogram, datafile, utils (no db, no cmd)# db → (stdlib only after Placeholders inline; wav alias allowed)# spectrogram → audio, wav# wav → audio, astro, utils# datafile → utils (FindFiles only)# config → (stdlib only)# audio, astro, mapping, utils → (no skraak/* imports — true leaves)
config:files: ["**/config/*.go"]deny:- { pkg: "skraak/", desc: "config is a leaf package — no skraak/* imports" }datafile:files: ["**/datafile/*.go"]deny:- { pkg: "skraak/cmd", desc: "datafile must not import cmd" }- { pkg: "skraak/tools", desc: "datafile must not import tools" }- { pkg: "skraak/tui", desc: "datafile must not import tui" }- { pkg: "skraak/db", desc: "datafile must not import db" }- { pkg: "skraak/wav", desc: "datafile must not import wav" }- { pkg: "skraak/audio", desc: "datafile must not import audio" }- { pkg: "skraak/spectrogram", desc: "datafile must not import spectrogram" }- { pkg: "skraak/astro", desc: "datafile must not import astro" }
- { pkg: "skraak/cmd", desc: "spectrogram must not import cmd" }- { pkg: "skraak/tools", desc: "spectrogram must not import tools" }- { pkg: "skraak/tui", desc: "spectrogram must not import tui" }- { pkg: "skraak/db", desc: "spectrogram must not import db" }- { pkg: "skraak/utils", desc: "spectrogram should only depend on audio + wav" }- { pkg: "skraak/astro", desc: "spectrogram should only depend on audio + wav" }- { pkg: "skraak/mapping", desc: "spectrogram should only depend on audio + wav" }
- { pkg: "skraak/cmd", desc: "spectrogram must not import cmd" }- { pkg: "skraak/tools", desc: "spectrogram must not import tools" }- { pkg: "skraak/tui", desc: "spectrogram must not import tui" }- { pkg: "skraak/db", desc: "spectrogram must not import db" }- { pkg: "skraak/utils", desc: "spectrogram should only depend on audio + wav" }- { pkg: "skraak/astro", desc: "spectrogram should only depend on audio + wav" }- { pkg: "skraak/mapping", desc: "spectrogram should only depend on audio + wav" }- { pkg: "skraak/datafile", desc: "spectrogram should only depend on audio + wav" }
- { pkg: "skraak/cmd", desc: "db may only import wav + utils" }- { pkg: "skraak/tools", desc: "db may only import wav + utils" }- { pkg: "skraak/tui", desc: "db may only import wav + utils" }- { pkg: "skraak/spectrogram", desc: "db may only import wav + utils" }
- { pkg: "skraak/cmd", desc: "db may only import wav (GainLevel alias)" }- { pkg: "skraak/tools", desc: "db may only import wav (GainLevel alias)" }- { pkg: "skraak/tui", desc: "db may only import wav (GainLevel alias)" }- { pkg: "skraak/utils", desc: "db must not import utils (Placeholders is inlined)" }- { pkg: "skraak/spectrogram", desc: "db may only import wav (GainLevel alias)" }- { pkg: "skraak/datafile", desc: "db must not import datafile" }