trying gemini: Inconsistent Standards in @utils/ refactoring

quietlight
Apr 30, 2026, 4:43 AM
LQLC7S3ADBR4O2JYVUSQJD65U3HG4ADOQBGB4F7KQCXUMNKMNEKAC

Dependencies

  • [2] KZKLAINJ run out of space on nest, cleaned out

Change contents

  • replacement in utils/wav_metadata.go at line 377
    [2.36719][2.36719:37000]()
    // ReadWAVSamples reads audio samples from a WAV file and returns them as float64.
    // Mono files: returns single channel.
    // Stereo files: returns left channel only.
    // Samples are normalized to the range -1.0 to 1.0.
    func ReadWAVSamples(filepath string) ([]float64, int, error) {
    [2.36719]
    [2.37000]
    // ReadWAVSegmentSamples reads a specific time range of audio samples from a WAV file.
    // If startSec < 0, it starts from 0.
    // If endSec <= 0 or endSec > duration, it reads to the end.
    func ReadWAVSegmentSamples(filepath string, startSec, endSec float64) ([]float64, int, error) {
  • replacement in utils/wav_metadata.go at line 432
    [2.38501][2.38501:38556]()
    dataOffset, _ = file.Seek(0, 1) // Current position
    [2.38501]
    [2.38556]
    dataOffset, _ = file.Seek(0, io.SeekCurrent)
  • edit in utils/wav_metadata.go at line 434
    [2.38580][2.38580:38617]()
    // Done - we found the data chunk
  • replacement in utils/wav_metadata.go at line 437
    [2.38647][2.38647:38726]()
    // Skip unknown chunk
    if _, err := file.Seek(chunkSize, 1); err != nil {
    [2.38647]
    [2.38726]
    if _, err := file.Seek(chunkSize, io.SeekCurrent); err != nil {
  • replacement in utils/wav_metadata.go at line 444
    [2.38839][2.38839:38885]()
    if _, err := file.Seek(1, 1); err != nil {
    [2.38839]
    [2.38885]
    if _, err := file.Seek(1, io.SeekCurrent); err != nil {
  • replacement in utils/wav_metadata.go at line 457
    [2.39162][2.39162:39298]()
    // Read audio data
    if _, err := file.Seek(dataOffset, 0); err != nil {
    return nil, 0, fmt.Errorf("failed to seek to data: %w", err)
    [2.39162]
    [2.39298]
    bytesPerSample := bitsPerSample / 8
    blockAlign := bytesPerSample * channels
    startOffset := int64(0)
    var readSize int64
    if startSec > 0 {
    startSample := int64(startSec * float64(sampleRate))
    startOffset = startSample * int64(blockAlign)
    if startOffset > dataSize {
    startOffset = dataSize
    }
  • replacement in utils/wav_metadata.go at line 471
    [2.39302][2.39302:39339]()
    audioData := make([]byte, dataSize)
    [2.39302]
    [2.39339]
    if endSec > 0 {
    endSample := int64(endSec * float64(sampleRate))
    endOffset := endSample * int64(blockAlign)
    if endOffset > dataSize {
    endOffset = dataSize
    }
    if endOffset > startOffset {
    readSize = endOffset - startOffset
    } else {
    readSize = 0
    }
    } else {
    readSize = dataSize - startOffset
    }
    if readSize == 0 {
    return []float64{}, sampleRate, nil
    }
    if _, err := file.Seek(dataOffset+startOffset, io.SeekStart); err != nil {
    return nil, 0, fmt.Errorf("failed to seek to data segment: %w", err)
    }
    audioData := make([]byte, readSize)
  • replacement in utils/wav_metadata.go at line 496
    [2.39396][2.39396:39462]()
    return nil, 0, fmt.Errorf("failed to read audio data: %w", err)
    [2.39396]
    [2.39462]
    // If we hit EOF unexpectedly, we just use what we read
    if err != io.EOF && err != io.ErrUnexpectedEOF {
    return nil, 0, fmt.Errorf("failed to read audio data: %w", err)
    }
  • edit in utils/wav_metadata.go at line 508
    [2.39599]
    [2.39599]
    // ReadWAVSamples reads audio samples from a WAV file and returns them as float64.
    // Mono files: returns single channel.
    // Stereo files: returns left channel only.
    // Samples are normalized to the range -1.0 to 1.0.
    func ReadWAVSamples(filepath string) ([]float64, int, error) {
    return ReadWAVSegmentSamples(filepath, 0, 0)
    }
  • edit in utils/validation.go at line 4
    [2.45875][2.45875:45891]()
    "database/sql"
  • edit in utils/validation.go at line 104
    [2.49099][2.49099:51461]()
    }
    return nil
    }
    // GetDatasetType returns the type of a dataset
    // Returns: (type, exists, error)
    func GetDatasetType(db *sql.DB, datasetID string) (string, bool, error) {
    var datasetType string
    err := db.QueryRow("SELECT type FROM dataset WHERE id = ?", datasetID).Scan(&datasetType)
    if err == sql.ErrNoRows {
    return "", false, nil
    }
    if err != nil {
    return "", false, err
    }
    return datasetType, true, nil
    }
    // ValidateDatasetTypeForImport checks that a dataset is 'structured' type for file imports
    // Returns error if dataset doesn't exist or is not 'structured'
    func ValidateDatasetTypeForImport(db *sql.DB, datasetID string) error {
    datasetType, exists, err := GetDatasetType(db, datasetID)
    if err != nil {
    return fmt.Errorf("failed to query dataset type: %w", err)
    }
    if !exists {
    return fmt.Errorf("dataset not found: %s", datasetID)
    }
    if datasetType != "structured" {
    return fmt.Errorf("dataset '%s' is type '%s' - file imports only support 'structured' datasets", datasetID, datasetType)
    }
    return nil
    }
    // ValidateDatasetTypeUnstructured checks that a dataset is 'unstructured' type
    // Returns error if dataset doesn't exist or is not 'unstructured'
    func ValidateDatasetTypeUnstructured(db *sql.DB, datasetID string) error {
    datasetType, exists, err := GetDatasetType(db, datasetID)
    if err != nil {
    return fmt.Errorf("failed to query dataset type: %w", err)
    }
    if !exists {
    return fmt.Errorf("dataset not found: %s", datasetID)
    }
    if datasetType != "unstructured" {
    return fmt.Errorf("dataset '%s' is type '%s' - this command only supports 'unstructured' datasets", datasetID, datasetType)
    }
    return nil
    }
    // ValidateLocationBelongsToDataset checks that a location belongs to a specific dataset
    // Returns error if location doesn't exist or belongs to a different dataset
    func ValidateLocationBelongsToDataset(db *sql.DB, locationID, datasetID string) error {
    var locationDatasetID string
    err := db.QueryRow("SELECT dataset_id FROM location WHERE id = ? AND active = true", locationID).Scan(&locationDatasetID)
    if err == sql.ErrNoRows {
    return fmt.Errorf("location not found or inactive: %s", locationID)
    }
    if err != nil {
    return fmt.Errorf("failed to query location: %w", err)
    }
    if locationDatasetID != datasetID {
    return fmt.Errorf("location %s does not belong to dataset %s", locationID, datasetID)
  • replacement in utils/spectrogram.go at line 194
    [2.68133][2.68133:68207]()
    // Read WAV samples
    samples, sampleRate, err := ReadWAVSamples(wavPath)
    [2.68133]
    [2.68207]
    // Read only the requested segment's samples from the WAV file
    segSamples, sampleRate, err := ReadWAVSegmentSamples(wavPath, startTime, endTime)
  • edit in utils/spectrogram.go at line 200
    [2.68246][2.68246:68352]()
    // Extract segment samples
    segSamples := ExtractSegmentSamples(samples, sampleRate, startTime, endTime)
  • edit in utils/mapping.go at line 10
    [2.89645]
    [2.89645]
    "skraak/db"
  • edit in utils/mapping.go at line 90
    [2.92000]
    [2.92000]
    }
    // DBQueryer is an interface satisfied by *sql.DB and *sql.Tx
    type DBQueryer interface {
    Query(query string, args ...any) (*sql.Rows, error)
  • replacement in utils/mapping.go at line 100
    [2.92217][2.92217:92230]()
    db *sql.DB,
    [2.92217]
    [2.92230]
    queryer DBQueryer,
  • replacement in utils/mapping.go at line 169
    [2.94174][2.94174:94289]()
    query := `SELECT label FROM species WHERE label IN (` + Placeholders(len(speciesLabels)) + `) AND active = true`
    [2.94174]
    [2.94289]
    query := `SELECT label FROM species WHERE label IN (` + db.Placeholders(len(speciesLabels)) + `) AND active = true`
  • replacement in utils/mapping.go at line 175
    [2.94387][2.94387:94427]()
    rows, err := db.Query(query, args...)
    [2.94387]
    [2.94427]
    rows, err := queryer.Query(query, args...)
  • replacement in utils/mapping.go at line 212
    [2.95199][2.95199:95296]()
    WHERE s.label = ? AND ct.label IN (` + Placeholders(len(ctLabels)) + `) AND ct.active = true`
    [2.95199]
    [2.95296]
    WHERE s.label = ? AND ct.label IN (` + db.Placeholders(len(ctLabels)) + `) AND ct.active = true`
  • replacement in utils/mapping.go at line 219
    [2.95412][2.95412:95452]()
    rows, err := db.Query(query, args...)
    [2.95412]
    [2.95452]
    rows, err := queryer.Query(query, args...)
  • edit in utils/mapping.go at line 339
    [2.98723][2.98723:98946]()
    }
    // placeholders generates SQL placeholder string for IN clauses
    func Placeholders(n int) string {
    if n == 0 {
    return ""
    }
    ph := make([]string, n)
    for i := range ph {
    ph[i] = "?"
    }
    return strings.Join(ph, ", ")
  • edit in utils/data_file.go at line 8
    [2.157989]
    [2.157989]
    "path/filepath"
  • replacement in utils/data_file.go at line 346
    [2.165691][2.165691:165733]()
    files = append(files, folder+"/"+name)
    [2.165691]
    [2.165733]
    files = append(files, filepath.Join(folder, name))
  • replacement in tools/import_unstructured.go at line 227
    [2.314007][2.314007:314097]()
    if err := utils.ValidateDatasetTypeUnstructured(database, input.DatasetID); err != nil {
    [2.314007]
    [2.314097]
    if err := db.ValidateDatasetTypeUnstructured(database, input.DatasetID); err != nil {
  • replacement in tools/import_segments.go at line 332
    [2.328945][2.328945:329058]()
    query := `SELECT id, name FROM filter WHERE name IN (` + utils.Placeholders(len(names)) + `) AND active = true`
    [2.328945]
    [2.329058]
    query := `SELECT id, name FROM filter WHERE name IN (` + db.Placeholders(len(names)) + `) AND active = true`
  • replacement in tools/import_segments.go at line 391
    [2.330561][2.330561:330686]()
    query := `SELECT id, label FROM species WHERE label IN (` + utils.Placeholders(len(dbSpeciesList)) + `) AND active = true`
    [2.330561]
    [2.330686]
    query := `SELECT id, label FROM species WHERE label IN (` + db.Placeholders(len(dbSpeciesList)) + `) AND active = true`
  • replacement in tools/import_files.go at line 148
    [2.349995][2.349995:350076]()
    if err := utils.ValidateDatasetTypeForImport(database, datasetID); err != nil {
    [2.349995]
    [2.350076]
    if err := db.ValidateDatasetTypeForImport(database, datasetID); err != nil {
  • replacement in tools/bulk_file_import.go at line 300
    [2.632715][2.632715:632802]()
    if err := utils.ValidateDatasetTypeForImport(database, input.DatasetID); err != nil {
    [2.632715]
    [2.632802]
    if err := db.ValidateDatasetTypeForImport(database, input.DatasetID); err != nil {
  • replacement in tools/bulk_file_import.go at line 319
    [2.633317][2.633317:633413]()
    if err := utils.ValidateLocationBelongsToDataset(dbConn, locationID, datasetID); err != nil {
    [2.633317]
    [2.633413]
    if err := db.ValidateLocationBelongsToDataset(dbConn, locationID, datasetID); err != nil {
  • edit in me.txt at line 561
    [2.772785]
    Here is a brief report based on a comprehensive review of the utils directory.
    ### 1. Duplicated Functionality
    - Audio Processing (Float64 to 16-bit PCM): wav_writer.go and audio_player.go contain identical,
    duplicated logic for converting float64 arrays into int16 LittleEndian bytes (including the [-1.0,
    1.0] bounds clamping and * 32767 scaling).
    - Timestamp Resolution Strategy: cluster_import.go (batchProcessFiles) manually reimplements the
    entire fallback chain for timestamp resolution (AudioMoth → Filename → File Modification Time). This
    exact logic is already neatly abstracted in file_import.go as ResolveTimestamp().
    - Directory Scanning: cluster_import.go implements its own scanClusterFiles to find .wav files, while
    data_file.go implements FindDataFiles to find .data files. These could be consolidated into a
    generic, reusable directory walker.
    ### 2. Untested Code
    Overall statement coverage for utils is at 46.5%. The following areas are completely untested (0%
    coverage):
    - Files with 0% coverage:
    - audio_player.go (Audio playback and context management)
    - config.go (JSON config file loading/parsing)
    - cluster_import.go (The core batch import logic and database transactions)
    - spectrogram.go (FFT operations, windowing, and image rendering)
    - wav_writer.go (WAV encoding logic)
    - Untested Critical Functions in otherwise tested files:
    - file_import.go: ProcessSingleFile, CheckDuplicateHash
    - mapping.go: ValidateMappingAgainstDB, Classify, ValidateCoversSpecies, Classes, Placeholders
    - validation.go: GetDatasetType, ValidateDatasetTypeForImport, ValidateDatasetTypeUnstructured,
    ValidateLocationBelongsToDataset
    ### 3. Inconsistent Standards
    - Memory Inefficiency in Spectrograms: In spectrogram.go, GenerateSegmentSpectrogram loads the entire
    WAV file into memory using ReadWAVSamples(wavPath) before calling ExtractSegmentSamples. If a user
    requests a 3-second segment from a 500MB continuous recording, the process will unnecessarily
    allocate the whole file.
    - Path Construction: In data_file.go (FindDataFiles), paths are constructed using string
    concatenation (folder+"/"+name) instead of standard library utilities (filepath.Join), which is
    handled correctly elsewhere in the codebase.
    - Separation of Concerns (DB vs Utils): Pure utility files are tightly coupled with the database. For
    example, validation.go mixes pure string/numeric assertions (ValidateShortID) with stateful database
    queries (e.g. ValidateLocationBelongsToDataset). Similarly, mapping.go contains
    ValidateMappingAgainstDB. These queries belong in a db package or should rely on injected interfaces
    rather than hardcoding *sql.DB dependencies into utils/.
    - Misplaced Helpers: The SQL utility function Placeholders(n int) is randomly declared inside
    mapping.go instead of a dedicated database or query utility file.
  • file addition: validation.go (----------)
    [2.790921]
    package db
    import (
    "database/sql"
    "fmt"
    )
    // GetDatasetType returns the type of a dataset
    // Returns: (type, exists, error)
    func GetDatasetType(database *sql.DB, datasetID string) (string, bool, error) {
    var datasetType string
    err := database.QueryRow("SELECT type FROM dataset WHERE id = ?", datasetID).Scan(&datasetType)
    if err == sql.ErrNoRows {
    return "", false, nil
    }
    if err != nil {
    return "", false, err
    }
    return datasetType, true, nil
    }
    // ValidateDatasetTypeForImport checks that a dataset is 'structured' type for file imports
    // Returns error if dataset doesn't exist or is not 'structured'
    func ValidateDatasetTypeForImport(database *sql.DB, datasetID string) error {
    datasetType, exists, err := GetDatasetType(database, datasetID)
    if err != nil {
    return fmt.Errorf("failed to query dataset type: %w", err)
    }
    if !exists {
    return fmt.Errorf("dataset not found: %s", datasetID)
    }
    if datasetType != "structured" {
    return fmt.Errorf("dataset '%s' is type '%s' - file imports only support 'structured' datasets", datasetID, datasetType)
    }
    return nil
    }
    // ValidateDatasetTypeUnstructured checks that a dataset is 'unstructured' type
    // Returns error if dataset doesn't exist or is not 'unstructured'
    func ValidateDatasetTypeUnstructured(database *sql.DB, datasetID string) error {
    datasetType, exists, err := GetDatasetType(database, datasetID)
    if err != nil {
    return fmt.Errorf("failed to query dataset type: %w", err)
    }
    if !exists {
    return fmt.Errorf("dataset not found: %s", datasetID)
    }
    if datasetType != "unstructured" {
    return fmt.Errorf("dataset '%s' is type '%s' - this command only supports 'unstructured' datasets", datasetID, datasetType)
    }
    return nil
    }
    // ValidateLocationBelongsToDataset checks that a location belongs to a specific dataset
    // Returns error if location doesn't exist or belongs to a different dataset
    func ValidateLocationBelongsToDataset(database *sql.DB, locationID, datasetID string) error {
    var locationDatasetID string
    err := database.QueryRow("SELECT dataset_id FROM location WHERE id = ? AND active = true", locationID).Scan(&locationDatasetID)
    if err == sql.ErrNoRows {
    return fmt.Errorf("location not found or inactive: %s", locationID)
    }
    if err != nil {
    return fmt.Errorf("failed to query location: %w", err)
    }
    if locationDatasetID != datasetID {
    return fmt.Errorf("location %s does not belong to dataset %s", locationID, datasetID)
    }
    return nil
    }
  • file addition: utils.go (----------)
    [2.790921]
    package db
    import "strings"
    // Placeholders generates SQL placeholder string for IN clauses
    func Placeholders(n int) string {
    if n == 0 {
    return ""
    }
    ph := make([]string, n)
    for i := range ph {
    ph[i] = "?"
    }
    return strings.Join(ph, ", ")
    }