P6OU2H3DSB5V53JKM2GZ3IXSNSUF23YRXQRWSEJ2U3ARCR5TP4IQC // ComputeXXH64Streaming computes the XXH64 hash of a file using streaming I/O.// This uses constant memory (~128KB buffer) regardless of file size.// Returns the hash as a 16-character lowercase hexadecimal string.func ComputeXXH64Streaming(filepath string) (string, error) {file, err := os.Open(filepath)if err != nil {return "", fmt.Errorf("failed to open file: %w", err)}defer file.Close()// Get hash buffer from poolhashBufPtr := getHashBuffer()defer putHashBuffer(hashBufPtr)h := xxhash.New()if _, err := io.CopyBuffer(h, file, *hashBufPtr); err != nil {return "", fmt.Errorf("failed to read file: %w", err)}// Format as 16-character lowercase hex with zero-paddingreturn fmt.Sprintf("%016x", h.Sum64()), nil}
// Buffer pools for reducing GC pressure during batch importsvar (// headerBufferPool stores 200KB buffers for WAV header readingheaderBufferPool = sync.Pool{New: func() interface{} {buf := make([]byte, 200*1024)return &buf},})// getHeaderBuffer gets a 200KB buffer from the poolfunc getHeaderBuffer() *[]byte {return headerBufferPool.Get().(*[]byte)}// putHeaderBuffer returns a 200KB buffer to the poolfunc putHeaderBuffer(buf *[]byte) {headerBufferPool.Put(buf)}
// ParseWAVHeaderWithHash reads the WAV file once to extract both metadata and hash.// This is more efficient than calling ParseWAVHeader and ComputeXXH64 separately,// as it only opens the file once and reads it in a single pass.// Returns (metadata, hash, error).func ParseWAVHeaderWithHash(filepath string) (*WAVMetadata, string, error) {file, err := os.Open(filepath)if err != nil {return nil, "", fmt.Errorf("failed to open file: %w", err)}defer file.Close()// Get file info for modification time and sizefileInfo, err := file.Stat()if err != nil {return nil, "", fmt.Errorf("failed to get file info: %w", err)}modTime := fileInfo.ModTime()fileSize := fileInfo.Size()// Get header buffer from poolheaderBufPtr := getHeaderBuffer()defer putHeaderBuffer(headerBufPtr)headerBuf := (*headerBufPtr)[:cap(*headerBufPtr)]// Read first 200KB for header parsingn, err := file.Read(headerBuf)if err != nil && err != io.EOF {return nil, "", fmt.Errorf("failed to read header: %w", err)}headerBuf = headerBuf[:n]// Parse headermetadata, err := parseWAVFromBytes(headerBuf)if err != nil {return nil, "", err}metadata.FileModTime = modTimemetadata.FileSize = fileSize// Hash: seek back to start and stream entire fileif _, err := file.Seek(0, 0); err != nil {return nil, "", fmt.Errorf("failed to seek: %w", err)}// Get hash buffer from poolhashBufPtr := getHashBuffer()defer putHashBuffer(hashBufPtr)hashBuf := *hashBufPtrh := xxhash.New()if _, err := io.CopyBuffer(h, file, hashBuf); err != nil {return nil, "", fmt.Errorf("failed to read file for hash: %w", err)}hash := fmt.Sprintf("%016x", h.Sum64())return metadata, hash, nil}
if fileInfo, err := os.Stat(filepath); err == nil {// Estimate: fileSize - header (assume ~100 bytes header)dataSize := fileInfo.Size() - 100if metadata.SampleRate > 0 && metadata.Channels > 0 && metadata.BitsPerSample > 0 {bytesPerSample := metadata.BitsPerSample / 8bytesPerSecond := metadata.SampleRate * metadata.Channels * bytesPerSampleif bytesPerSecond > 0 {metadata.Duration = float64(dataSize) / float64(bytesPerSecond)}}}
return nil, fmt.Errorf("invalid WAV file: missing or corrupt data chunk")