package utils
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"os"
)
// WAVMetadata contains metadata extracted from WAV file headers
type WAVMetadata struct {
Duration float64 // Duration in seconds
SampleRate int // Sample rate in Hz
Comment string // Comment from INFO chunk (may contain AudioMoth data)
Artist string // Artist from INFO chunk
Channels int // Number of audio channels
BitsPerSample int // Bits per sample
}
// ParseWAVHeader efficiently reads only the WAV file header to extract metadata.
// It reads the first 200KB of the file, which should be sufficient for all header chunks.
func ParseWAVHeader(filepath string) (*WAVMetadata, error) {
file, err := os.Open(filepath)
if err != nil {
return nil, fmt.Errorf("failed to open file: %w", err)
}
defer file.Close()
// Read first 200KB for header parsing (more than enough for metadata)
headerBuf := make([]byte, 200*1024)
n, err := file.Read(headerBuf)
if err != nil && err != io.EOF {
return nil, fmt.Errorf("failed to read header: %w", err)
}
headerBuf = headerBuf[:n]
return parseWAVFromBytes(headerBuf, filepath)
}
// parseWAVFromBytes parses WAV metadata from a byte buffer
func parseWAVFromBytes(data []byte, filepath string) (*WAVMetadata, error) {
if len(data) < 44 {
return nil, fmt.Errorf("file too small to be valid WAV")
}
// Verify RIFF header
if string(data[0:4]) != "RIFF" {
return nil, fmt.Errorf("not a valid WAV file (missing RIFF header)")
}
// Verify WAVE format
if string(data[8:12]) != "WAVE" {
return nil, fmt.Errorf("not a valid WAV file (missing WAVE format)")
}
metadata := &WAVMetadata{}
// Parse chunks
offset := 12
for offset < len(data)-8 {
// Read chunk ID and size
chunkID := string(data[offset : offset+4])
chunkSize := int(binary.LittleEndian.Uint32(data[offset+4 : offset+8]))
offset += 8
// Ensure we don't read beyond buffer
if offset+chunkSize > len(data) {
break
}
switch chunkID {
case "fmt ":
// Parse format chunk
if chunkSize >= 16 {
// audioFormat := binary.LittleEndian.Uint16(data[offset : offset+2])
metadata.Channels = int(binary.LittleEndian.Uint16(data[offset+2 : offset+4]))
metadata.SampleRate = int(binary.LittleEndian.Uint32(data[offset+4 : offset+8]))
// byteRate := binary.LittleEndian.Uint32(data[offset+8 : offset+12])
// blockAlign := binary.LittleEndian.Uint16(data[offset+12 : offset+14])
metadata.BitsPerSample = int(binary.LittleEndian.Uint16(data[offset+14 : offset+16]))
}
case "data":
// Calculate duration from data chunk size
if metadata.SampleRate > 0 && metadata.Channels > 0 && metadata.BitsPerSample > 0 {
bytesPerSample := metadata.BitsPerSample / 8
bytesPerSecond := metadata.SampleRate * metadata.Channels * bytesPerSample
if bytesPerSecond > 0 {
metadata.Duration = float64(chunkSize) / float64(bytesPerSecond)
}
}
case "LIST":
// Parse LIST chunk for INFO metadata
if chunkSize >= 4 {
listType := string(data[offset : offset+4])
if listType == "INFO" {
// Parse INFO subchunks
parseINFOChunk(data[offset+4:offset+chunkSize], metadata)
}
}
}
// Move to next chunk (chunks are word-aligned)
offset += chunkSize
if chunkSize%2 != 0 {
offset++ // Skip padding byte
}
}
// If duration couldn't be calculated from data chunk, try from file size
if metadata.Duration == 0 {
if fileInfo, err := os.Stat(filepath); err == nil {
// Estimate: fileSize - header (assume ~100 bytes header)
dataSize := fileInfo.Size() - 100
if metadata.SampleRate > 0 && metadata.Channels > 0 && metadata.BitsPerSample > 0 {
bytesPerSample := metadata.BitsPerSample / 8
bytesPerSecond := metadata.SampleRate * metadata.Channels * bytesPerSample
if bytesPerSecond > 0 {
metadata.Duration = float64(dataSize) / float64(bytesPerSecond)
}
}
}
}
return metadata, nil
}
// parseINFOChunk parses INFO list chunk for comment and artist metadata
func parseINFOChunk(data []byte, metadata *WAVMetadata) {
offset := 0
for offset < len(data)-8 {
// Read subchunk ID and size
if offset+8 > len(data) {
break
}
subchunkID := string(data[offset : offset+4])
subchunkSize := int(binary.LittleEndian.Uint32(data[offset+4 : offset+8]))
offset += 8
if offset+subchunkSize > len(data) {
break
}
// Extract null-terminated string
value := extractNullTerminatedString(data[offset : offset+subchunkSize])
switch subchunkID {
case "ICMT": // Comment
metadata.Comment = value
case "IART": // Artist
metadata.Artist = value
}
// Move to next subchunk (word-aligned)
offset += subchunkSize
if subchunkSize%2 != 0 {
offset++ // Skip padding byte
}
}
}
// extractNullTerminatedString extracts a null-terminated string from bytes
func extractNullTerminatedString(data []byte) string {
nullIdx := bytes.IndexByte(data, 0)
if nullIdx >= 0 {
return string(data[:nullIdx])
}
return string(data)
}