more refactoring with glm
Dependencies
- [2]
QVIGQOQZmore work on utils/ with glm - [3]
LBWQJEDHminor refactor and more tests for utils/ - [4]
KZKLAINJrun out of space on nest, cleaned out - [5]
LQLC7S3Atrying gemini: Inconsistent Standards in @utils/ refactoring
Change contents
- replacement in utils/wav_metadata.go at line 142
// Returns (sampleRate, duration, error). Does not parse INFO chunks.// Returns (sampleRate, duration, error). Delegates to parseWAVFromBytes and// extracts just the fields needed for batch processing. - replacement in utils/wav_metadata.go at line 145
if len(data) < 44 {return 0, 0, fmt.Errorf("file too small to be valid WAV")}// Verify RIFF headerif string(data[0:4]) != "RIFF" {return 0, 0, fmt.Errorf("not a valid WAV file (missing RIFF header)")}// Verify WAVE formatif string(data[8:12]) != "WAVE" {return 0, 0, fmt.Errorf("not a valid WAV file (missing WAVE format)")}var channels, bitsPerSample int// Parse chunks - stop after finding data chunkoffset := 12for offset < len(data)-8 {chunkID := string(data[offset : offset+4])chunkSize := int(binary.LittleEndian.Uint32(data[offset+4 : offset+8]))offset += 8switch chunkID {case "fmt ":// Parse format chunkif chunkSize >= 16 && offset+16 <= len(data) {channels = int(binary.LittleEndian.Uint16(data[offset+2 : offset+4]))sampleRate = int(binary.LittleEndian.Uint32(data[offset+4 : offset+8]))bitsPerSample = int(binary.LittleEndian.Uint16(data[offset+14 : offset+16]))}case "data":// Found data chunk - calculate duration and returnif sampleRate > 0 && channels > 0 && bitsPerSample > 0 {bytesPerSample := bitsPerSample / 8bytesPerSecond := sampleRate * channels * bytesPerSampleif bytesPerSecond > 0 {duration = float64(chunkSize) / float64(bytesPerSecond)return sampleRate, duration, nil}}return 0, 0, fmt.Errorf("invalid WAV: fmt chunk missing or corrupt before data chunk")}// Move to next chunk (word-aligned)offset += chunkSizeif chunkSize%2 != 0 {offset++}metadata, err := parseWAVFromBytes(data)if err != nil {return 0, 0, err - replacement in utils/wav_metadata.go at line 149
// Data chunk not found within 4KB - file may have large INFO chunksreturn 0, 0, fmt.Errorf("data chunk not found in first 4KB (try ParseWAVHeader for full parsing)")return metadata.SampleRate, metadata.Duration, nil - edit in utils/resample.go at line 9
return samples}// Calculate ratio: toRate/fromRate (e.g., 16000/250000 = 0.064)ratio := float64(toRate) / float64(fromRate)newLen := int(float64(len(samples)) * ratio)if newLen <= 0 { - edit in utils/resample.go at line 10
}result := make([]float64, newLen)for i := range newLen {// Source index in original samples (floating point)srcIdx := float64(i) / ratioidx0 := int(srcIdx)idx1 := idx0 + 1// Clamp to valid rangeif idx0 >= len(samples) {idx0 = len(samples) - 1}if idx1 >= len(samples) {idx1 = len(samples) - 1}// Linear interpolation between adjacent samplesfrac := srcIdx - float64(idx0)result[i] = samples[idx0]*(1-frac) + samples[idx1]*frac - replacement in utils/resample.go at line 11
return result// speed = fromRate/toRate: e.g. 250000/16000 = 15.625 (skip samples to downsample)return Resample(samples, float64(fromRate)/float64(toRate)) - edit in utils/mapping.go at line 4
"database/sql" - edit in utils/mapping.go at line 89
}// DBQueryer is an interface satisfied by *sql.DB and *sql.Txtype DBQueryer interface {Query(query string, args ...any) (*sql.Rows, error) - replacement in utils/mapping.go at line 94
queryer DBQueryer,queryer DB, - replacement in utils/file_import.go at line 124
// DBQueryable is an interface satisfied by both *sql.DB and *sql.Tx// for running duplicate hash checks against either.type DBQueryable interface {// DB is an interface satisfied by both *sql.DB and *sql.Tx.// Used throughout utils for database queries that must work with either.type DB interface {Query(query string, args ...any) (*sql.Rows, error) - replacement in utils/file_import.go at line 134
func CheckDuplicateHash(q DBQueryable, hash string) (existingID string, isDuplicate bool, err error) {func CheckDuplicateHash(q DB, hash string) (existingID string, isDuplicate bool, err error) { - replacement in utils/cluster_import.go at line 377[3.184587]→[3.184587:184637](∅→∅),[3.184637]→[2.902:981](∅→∅),[2.981]→[3.184698:184729](∅→∅),[3.184698]→[3.184698:184729](∅→∅)
var exists boolerr = tx.QueryRowContext(ctx,"SELECT EXISTS(SELECT 1 FROM file WHERE xxh64_hash = ? AND active = true)",fd.Hash,).Scan(&exists)_, isDuplicate, err := CheckDuplicateHash(tx, fd.Hash) - replacement in utils/cluster_import.go at line 387
if exists {if isDuplicate {