I2ZXQQGAMI6LUKJWOTOO55QKTMNHGSONM7QO7OU5HRGDYDVK6INAC 7IBDR2ANO7UI2RHDXZDHCZWZ7QO6LTOZJQOWIU6YWK35LPDAWHVQC NY5P6UPQVIVTLYRKDHSRZPZRM6KTV3VDYGJP6FSEWG6BXOTW6U3AC VZGXBNYYO3E7EPFQ4GOLNVMRXXTQDDQZUU2BZ6JHNBDY4B2QLDAAC P6OU2H3DSB5V53JKM2GZ3IXSNSUF23YRXQRWSEJ2U3ARCR5TP4IQC IFVRAERTCCDICNTYTG3TX2WASB6RXQQEJWWXQMQZJSQDQ3HLE5OQC NAZQZRYQTXWVE2VFY65ONSD6O3EUMNRHARCDVH2D2HKM3YH4RGUAC t.Errorf("Hashes are not deterministic: %s, %s, %s", hash1, hash2, hash3)}t.Logf("Hash is deterministic: %s", hash1)}func TestComputeXXH64Streaming_Equivalence(t *testing.T) {wavFile := filepath.Join("..", "audio", "N14-2025-02-25-20241116_054500-685-703.wav")// Test that streaming version produces same result as memory versionhashOriginal, err := ComputeXXH64(wavFile)if err != nil {t.Fatalf("ComputeXXH64() error = %v", err)}hashStreaming, err := ComputeXXH64Streaming(wavFile)if err != nil {t.Fatalf("ComputeXXH64Streaming() error = %v", err)}if hashOriginal != hashStreaming {t.Errorf("Streaming hash differs from original: original=%s streaming=%s", hashOriginal, hashStreaming)
t.Errorf("hashes not deterministic: %s, %s, %s", hash1, hash2, hash3)
func TestComputeXXH64Streaming_WAVFile(t *testing.T) {// Test streaming version with the same WAV filewavFile := filepath.Join("..", "audio", "N14-2025-02-25-20241116_054500-685-703.wav")hash, err := ComputeXXH64Streaming(wavFile)if err != nil {t.Fatalf("ComputeXXH64Streaming() error = %v", err)}expectedHash := "48dc1684324621de"if hash != expectedHash {t.Errorf("ComputeXXH64Streaming() = %v, want %v", hash, expectedHash)}t.Logf("Go XXH64 streaming hash: %s", hash)t.Logf("Expected hash: %s", expectedHash)}
// Helper function to create an empty file
func BenchmarkComputeXXH64_Small(b *testing.B) {f := filepath.Join("..", "audio", "N14-2025-02-25-20241116_054500-685-703.wav") // 547Kb.ResetTimer()for i := 0; i < b.N; i++ {ComputeXXH64(f)}}func BenchmarkComputeXXH64_Medium(b *testing.B) {f := filepath.Join("..", "audio", "20250518_210000.WAV") // 14Mb.ResetTimer()for i := 0; i < b.N; i++ {ComputeXXH64(f)}}func BenchmarkComputeXXH64_Large(b *testing.B) {f := filepath.Join("..", "audio", "E166_BIRD_111211_042726.wav") // 55Mb.ResetTimer()for i := 0; i < b.N; i++ {ComputeXXH64(f)}}
// Buffer pools for reducing GC pressure during batch importsvar (// hashBufferPool stores 128KB buffers for hash streaminghashBufferPool = sync.Pool{New: func() any {buf := make([]byte, 128*1024)return &buf},})
var hashBufferPool = sync.Pool{New: func() any {buf := make([]byte, 128*1024)return &buf},}
// Read entire file as binary datadata, err := os.ReadFile(filepath)if err != nil {return "", fmt.Errorf("failed to read file: %w", err)}// Compute XXH64 hash with default seed=0hashValue := xxhash.Sum64(data)// Format as 16-character lowercase hex with zero-padding// %016x = hex, lowercase, zero-padded to 16 charsreturn fmt.Sprintf("%016x", hashValue), nil}// ComputeXXH64Streaming computes the XXH64 hash of a file using streaming I/O.// This uses constant memory (~128KB buffer) regardless of file size.// Returns the hash as a 16-character lowercase hexadecimal string.func ComputeXXH64Streaming(filepath string) (string, error) {