replacement in utils/wav_metadata.go at line 67
[3.27177]→[3.27177:27510](∅→∅) − // ParseWAVHeader efficiently reads only the WAV file header to extract metadata.
− // It reads the first 200KB of the file, which should be sufficient for all header chunks.
− // ParseWAVHeader extracts metadata from WAV file including duration, sample rate, and INFO chunks
− func ParseWAVHeader(filepath string) (*WAVMetadata, error) {
+ // readAndParseHeader opens a WAV file, reads its header using the provided buffer pool,
+ // parses metadata, and sets file modification time and size.
+ func readAndParseHeader(filepath string, getBuf func() *[]byte, putBuf func(*[]byte)) (*WAVMetadata, error) {
edit in utils/wav_metadata.go at line 76
[3.27657]→[3.27657:27697](∅→∅) − // Get file info for modification time
edit in utils/wav_metadata.go at line 80
[3.27808]→[3.27808:27868](∅→∅) − modTime := fileInfo.ModTime()
− fileSize := fileInfo.Size()
replacement in utils/wav_metadata.go at line 81
[3.27869]→[3.27869:28024](∅→∅) − // Get header buffer from pool
− headerBufPtr := getHeaderBuffer()
− defer putHeaderBuffer(headerBufPtr)
− headerBuf := (*headerBufPtr)[:cap(*headerBufPtr)]
+ bufPtr := getBuf()
+ defer putBuf(bufPtr)
+ buf := (*bufPtr)[:cap(*bufPtr)]
replacement in utils/wav_metadata.go at line 85
[3.28025]→[3.28025:28129](∅→∅) − // Read first 200KB for header parsing (more than enough for metadata)
− n, err := file.Read(headerBuf)
+ n, err := file.Read(buf)
replacement in utils/wav_metadata.go at line 89
[3.28225]→[3.28225:28252](∅→∅) − headerBuf = headerBuf[:n]
replacement in utils/wav_metadata.go at line 91
[3.28253]→[3.28253:28300](∅→∅) − metadata, err := parseWAVFromBytes(headerBuf)
+ metadata, err := parseWAVFromBytes(buf)
edit in utils/wav_metadata.go at line 95
[3.28338]→[3.28338:28441](∅→∅) −
− // Set file modification time and size
− metadata.FileModTime = modTime
− metadata.FileSize = fileSize
edit in utils/wav_metadata.go at line 96
+ metadata.FileModTime = fileInfo.ModTime()
+ metadata.FileSize = fileInfo.Size()
edit in utils/wav_metadata.go at line 99
+ }
+
+ // ParseWAVHeader efficiently reads only the WAV file header to extract metadata.
+ // It reads the first 200KB of the file, which should be sufficient for all header chunks.
+ func ParseWAVHeader(filepath string) (*WAVMetadata, error) {
+ return readAndParseHeader(filepath, getHeaderBuffer, putHeaderBuffer)
replacement in utils/wav_metadata.go at line 112
[3.28917]→[3.28917:29566](∅→∅) − file, err := os.Open(filepath)
− if err != nil {
− return 0, 0, fmt.Errorf("failed to open file: %w", err)
− }
− defer func() { _ = file.Close() }()
−
− // Get minimal header buffer from pool (4KB)
− headerBufPtr := getMinimalHeaderBuffer()
− defer putMinimalHeaderBuffer(headerBufPtr)
− headerBuf := (*headerBufPtr)[:cap(*headerBufPtr)]
−
− // Read first 4KB - sufficient for fmt + data chunk headers in 99% of files
− n, err := file.Read(headerBuf)
− if err != nil && err != io.EOF {
− return 0, 0, fmt.Errorf("failed to read header: %w", err)
− }
− headerBuf = headerBuf[:n]
−
− // Parse minimal metadata
− sampleRate, duration, err = parseWAVMinimal(headerBuf)
+ metadata, err := readAndParseHeader(filepath, getMinimalHeaderBuffer, putMinimalHeaderBuffer)
edit in utils/wav_metadata.go at line 116
[3.29605]→[3.29605:29717](∅→∅),
[3.29717]→[3.7:141](∅→∅),
[3.141]→[3.29787:29869](∅→∅),
[3.29787]→[3.29787:29869](∅→∅),
[3.29869]→[3.142:220](∅→∅),
[3.220]→[3.31391:31394](∅→∅),
[3.31391]→[3.31391:31394](∅→∅) −
− return sampleRate, duration, nil
− }
−
− // parseWAVMinimal parses only essential WAV metadata from a byte buffer.
− // Returns (sampleRate, duration, error). Delegates to parseWAVFromBytes and
− // extracts just the fields needed for batch processing.
− func parseWAVMinimal(data []byte) (sampleRate int, duration float64, err error) {
− metadata, err := parseWAVFromBytes(data)
− if err != nil {
− return 0, 0, err
− }
edit in utils/wav_metadata.go at line 124
+ // Use readAndParseHeader for the header portion, but we need the file handle
+ // for hashing, so we can't fully delegate.
edit in utils/wav_metadata.go at line 132
[3.32065]→[3.32065:32114](∅→∅) − // Get file info for modification time and size
edit in utils/wav_metadata.go at line 136
[3.32229]→[3.32229:32289](∅→∅) − modTime := fileInfo.ModTime()
− fileSize := fileInfo.Size()
edit in utils/wav_metadata.go at line 137
[3.32290]→[3.32290:32322](∅→∅) − // Get header buffer from pool
edit in utils/wav_metadata.go at line 141
[3.32446]→[3.32446:32486](∅→∅) − // Read first 200KB for header parsing
edit in utils/wav_metadata.go at line 147
[3.32646]→[3.32646:32663](∅→∅) replacement in utils/wav_metadata.go at line 151
[3.32752]→[3.32752:32814](∅→∅) − metadata.FileModTime = modTime
− metadata.FileSize = fileSize
+ metadata.FileModTime = fileInfo.ModTime()
+ metadata.FileSize = fileInfo.Size()
edit in utils/wav_metadata.go at line 159
[3.32971]→[3.32971:33001](∅→∅) − // Get hash buffer from pool
edit in utils/file_import.go at line 10
+ // ImportStage identifies the pipeline stage where an error occurred.
+ type ImportStage string
+
+ const (
+ StageScan ImportStage = "scan" // directory scanning
+ StageHash ImportStage = "hash" // hash computation
+ StageParse ImportStage = "parse" // WAV header / filename parsing
+ StageProcess ImportStage = "process" // file processing
+ StageValidation ImportStage = "validation" // validation checks
+ StageInsert ImportStage = "insert" // database insertion
+ StageImport ImportStage = "import" // database import (segment pipeline)
+ )
+
replacement in utils/file_import.go at line 25
− FileName string `json:"file_name"`
− Error string `json:"error"`
− Stage string `json:"stage"` // "scan", "hash", "parse", "validate", "insert"
+ FileName string `json:"file_name"`
+ Error string `json:"error"`
+ Stage ImportStage `json:"stage"`
replacement in utils/cluster_import.go at line 42
[3.172658]→[3.172658:172944](∅→∅) − // fileData holds all data for a single file to be imported
− type fileData struct {
− FileName string
− Hash string
− Duration float64
− SampleRate int
− TimestampLocal time.Time
− IsAudioMoth bool
− MothData *AudioMothData
− AstroData AstronomicalData
− }
+ // FileProcessingResult is used for both single-file and cluster import pipelines.
replacement in utils/cluster_import.go at line 222
[2.5252]→[2.5252:5275](∅→∅) replacement in utils/cluster_import.go at line 234
[2.5605]→[2.5605:5628](∅→∅) replacement in utils/cluster_import.go at line 247
[2.5866]→[2.5866:5972](∅→∅) − func resolveFileData(info wavInfo, preParsedTime *time.Time, location *LocationData) (*fileData, error) {
+ func resolveFileData(info wavInfo, preParsedTime *time.Time, location *LocationData) (*FileProcessingResult, error) {
replacement in utils/cluster_import.go at line 260
[2.6256]→[2.6256:6275](∅→∅) + return &FileProcessingResult{
replacement in utils/cluster_import.go at line 273
[3.178522]→[3.178522:178650](∅→∅) − func batchProcessFiles(wavFiles []string, location *LocationData) ([]*fileData, []FileImportError) {
− var filesData []*fileData
+ func batchProcessFiles(wavFiles []string, location *LocationData) ([]*FileProcessingResult, []FileImportError) {
+ var filesData []*FileProcessingResult
replacement in utils/cluster_import.go at line 293
[3.179347]→[3.179347:179370](∅→∅) replacement in utils/cluster_import.go at line 328
[3.12067]→[3.182277:182300](∅→∅),
[3.182277]→[3.182277:182300](∅→∅) replacement in utils/cluster_import.go at line 343
[2.7206]→[2.7206:7221](∅→∅) + fd *FileProcessingResult,
replacement in utils/cluster_import.go at line 350
[2.7391]→[2.7391:7453](∅→∅) − return false, fmt.Errorf("duplicate check failed: %v", err)
+ return false, fmt.Errorf("duplicate check failed: %w", err)
replacement in utils/cluster_import.go at line 359
[2.7580]→[2.7580:7640](∅→∅) − return false, fmt.Errorf("ID generation failed: %v", err)
+ return false, fmt.Errorf("ID generation failed: %w", err)
replacement in utils/cluster_import.go at line 369
[2.7903]→[2.7903:7961](∅→∅) − return false, fmt.Errorf("file insert failed: %v", err)
+ return false, fmt.Errorf("file insert failed: %w", err)
replacement in utils/cluster_import.go at line 375
[3.183288]→[2.8066:8132](∅→∅) − return false, fmt.Errorf("file_dataset insert failed: %v", err)
+ return false, fmt.Errorf("file_dataset insert failed: %w", err)
replacement in utils/cluster_import.go at line 389
[2.8411]→[2.8411:8479](∅→∅) − return false, fmt.Errorf("moth_metadata insert failed: %v", err)
+ return false, fmt.Errorf("moth_metadata insert failed: %w", err)
replacement in utils/cluster_import.go at line 450
[2.9498]→[2.9498:9522](∅→∅) + filesData []*FileProcessingResult,
replacement in utils/cluster_import.go at line 472
[2.10180]→[3.185922:185946](∅→∅),
[3.185922]→[3.185922:185946](∅→∅) replacement in tools/import_unstructured.go at line 92
[3.310332]→[3.310332:310357](∅→∅) + Stage: utils.StageProcess,
replacement in tools/import_unstructured.go at line 244
[3.314502]→[3.314502:314524](∅→∅) + Stage: utils.StageScan,
replacement in tools/import_unstructured.go at line 270
[3.315038]→[3.315038:315060](∅→∅) + Stage: utils.StageScan,
replacement in tools/import_unstructured.go at line 280
[3.315282]→[3.315282:315304](∅→∅) + Stage: utils.StageScan,
replacement in tools/import_segments.go at line 67
[3.320277]→[3.320277:320415](∅→∅) − File string `json:"file,omitempty"`
− Stage string `json:"stage"` // "validation", "hash", "import"
− Message string `json:"message"`
+ File string `json:"file,omitempty"`
+ Stage utils.ImportStage `json:"stage"`
+ Message string `json:"message"`
replacement in tools/import_segments.go at line 302
[3.327484]→[3.327484:327511](∅→∅) + Stage: utils.StageValidation,
replacement in tools/import_segments.go at line 313
[3.327792]→[3.327792:327819](∅→∅) + Stage: utils.StageValidation,
replacement in tools/import_segments.go at line 484
[3.332386]→[3.332386:332407](∅→∅) + Stage: utils.StageHash,
replacement in tools/import_segments.go at line 501
[3.332868]→[3.332868:332895](∅→∅) + Stage: utils.StageValidation,
replacement in tools/import_segments.go at line 509
[3.333112]→[3.333112:333139](∅→∅) + Stage: utils.StageValidation,
replacement in tools/import_segments.go at line 526
[3.333648]→[3.333648:333675](∅→∅) + Stage: utils.StageValidation,
replacement in tools/import_segments.go at line 534
[3.333885]→[3.333885:333912](∅→∅) + Stage: utils.StageValidation,
replacement in tools/import_segments.go at line 551
[3.334374]→[3.334374:334401](∅→∅) + Stage: utils.StageValidation,
replacement in tools/import_segments.go at line 560
[3.334603]→[3.334603:334630](∅→∅) + Stage: utils.StageValidation,
replacement in tools/import_segments.go at line 604
[3.3797]→[3.3797:3851](∅→∅) − File: filepath.Base(sf.DataPath), Stage: "import",
+ File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
replacement in tools/import_segments.go at line 612
[3.4056]→[3.4056:4110](∅→∅) − File: filepath.Base(sf.DataPath), Stage: "import",
+ File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
replacement in tools/import_segments.go at line 620
[3.4304]→[3.4304:4358](∅→∅) − File: filepath.Base(sf.DataPath), Stage: "import",
+ File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
replacement in tools/import_segments.go at line 628
[3.4558]→[3.4558:4612](∅→∅) − File: filepath.Base(sf.DataPath), Stage: "import",
+ File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
replacement in tools/import_segments.go at line 639
[3.5016]→[3.5016:5070](∅→∅) − File: filepath.Base(sf.DataPath), Stage: "import",
+ File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
replacement in tools/import_segments.go at line 653
[3.5608]→[3.5608:5663](∅→∅) − File: filepath.Base(sf.DataPath), Stage: "import",
+ File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
replacement in tools/import_segments.go at line 701
[3.7013]→[3.7013:7067](∅→∅) − File: filepath.Base(sf.DataPath), Stage: "import",
+ File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
replacement in tools/import_segments.go at line 709
[3.7244]→[3.7244:7298](∅→∅) − File: filepath.Base(sf.DataPath), Stage: "import",
+ File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
replacement in tools/import_segments.go at line 720
[3.7679]→[3.7679:7733](∅→∅) − File: filepath.Base(sf.DataPath), Stage: "import",
+ File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
replacement in tools/import_segments.go at line 749
[3.335798]→[3.335798:335820](∅→∅) + Stage: utils.StageImport,
replacement in tools/import_segments.go at line 784
[3.342841]→[3.8241:8298](∅→∅) − File: filepath.Base(sf.DataPath), Stage: "import",
+ File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
replacement in tools/import_segments.go at line 800
[3.343340]→[3.343340:343362](∅→∅) + Stage: utils.StageImport,
replacement in tools/import_segments.go at line 826
[3.8956]→[3.8956:9010](∅→∅) − File: filepath.Base(sf.DataPath), Stage: "import",
+ File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
replacement in tools/import_segments.go at line 834
[3.9241]→[3.9241:9295](∅→∅) − File: filepath.Base(sf.DataPath), Stage: "import",
+ File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
replacement in tools/import_segments.go at line 843
[3.9557]→[3.9557:9611](∅→∅) − File: filepath.Base(sf.DataPath), Stage: "import",
+ File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
replacement in tools/import_segments.go at line 855
[3.10092]→[3.10092:10146](∅→∅) − File: filepath.Base(sf.DataPath), Stage: "import",
+ File: filepath.Base(sf.DataPath), Stage: utils.StageImport,
replacement in tools/import_segments.go at line 906
[3.344184]→[3.344184:344207](∅→∅) + Stage: utils.StageImport,
replacement in tools/import_segments.go at line 940
[3.345067]→[3.345067:345090](∅→∅) + Stage: utils.StageImport,
edit in CHANGELOG.md at line 4
+
+ ## [2026-05-05] Utils refactoring: dedup types, fix error wrapping, consolidate WAV parsers, typed ImportStage
+
+ Four focused refactoring changes in utils/ and tools/:
+
+ - **Merged `fileData` into `FileProcessingResult`**: The two identical structs
+ (8 fields, same order, same types) in `cluster_import.go` and `file_import.go`
+ have been unified. `cluster_import.go` now uses the exported
+ `FileProcessingResult` everywhere. Removes a type, reduces mental overhead.
edit in CHANGELOG.md at line 14
+ - **Fixed `%v` → `%w` in `insertSingleFile`**: Five `fmt.Errorf` calls in
+ `cluster_import.go::insertSingleFile` used `%v` instead of `%w`, breaking
+ `errors.Is`/`errors.As` chains. Now consistent with the rest of the file.
+
+ - **Consolidated `ParseWAVHeader*` boilerplate**: Extracted `readAndParseHeader`
+ helper that handles open→stat→read→parse→set-modtime. `ParseWAVHeader` and
+ `ParseWAVHeaderMinimal` are now thin wrappers (2-4 lines each).
+ `ParseWAVHeaderWithHash` keeps its own open+hash logic (needs the file handle
+ for streaming). Removed now-unused `parseWAVMinimal`.
+
+ - **Typed `ImportStage` constants**: Defined `ImportStage` type with constants
+ (`StageScan`, `StageHash`, `StageParse`, `StageProcess`, `StageValidation`,
+ `StageInsert`, `StageImport`) in `file_import.go`. Both `FileImportError`
+ and `ImportSegmentError` now use `ImportStage` instead of `string`.
+ All usages in `cluster_import.go`, `import_segments.go`, and
+ `import_unstructured.go` updated. Eliminates typos and clarifies the stage
+ set in one place.
+