3JA7HYRMHV57SIMGMGPDOMKQ3NBQS2SKOX3EKDHRBQRP7ZPZGFTQC // ReadWAVSamples reads audio samples from a WAV file and returns them as float64.// Mono files: returns single channel.// Stereo files: returns left channel only.// Samples are normalized to the range -1.0 to 1.0.func ReadWAVSamples(filepath string) ([]float64, int, error) {file, err := os.Open(filepath)if err != nil {return nil, 0, fmt.Errorf("failed to open file: %w", err)}defer file.Close()// Read header to get format infoheaderBuf := make([]byte, 44)if _, err := io.ReadFull(file, headerBuf); err != nil {return nil, 0, fmt.Errorf("failed to read header: %w", err)}// Verify RIFF/WAVE headerif string(headerBuf[0:4]) != "RIFF" || string(headerBuf[8:12]) != "WAVE" {return nil, 0, fmt.Errorf("not a valid WAV file")}// Parse chunks to find fmt and datavar sampleRate, channels, bitsPerSample intvar dataOffset, dataSize int64// Seek to first chunkif _, err := file.Seek(12, 0); err != nil {return nil, 0, fmt.Errorf("failed to seek: %w", err)}for {chunkHeader := make([]byte, 8)if _, err := io.ReadFull(file, chunkHeader); err != nil {if err == io.EOF {break}return nil, 0, fmt.Errorf("failed to read chunk header: %w", err)}chunkID := string(chunkHeader[0:4])chunkSize := int64(binary.LittleEndian.Uint32(chunkHeader[4:8]))switch chunkID {case "fmt ":fmtData := make([]byte, chunkSize)if _, err := io.ReadFull(file, fmtData); err != nil {return nil, 0, fmt.Errorf("failed to read fmt chunk: %w", err)}if len(fmtData) >= 16 {channels = int(binary.LittleEndian.Uint16(fmtData[2:4]))sampleRate = int(binary.LittleEndian.Uint32(fmtData[4:8]))bitsPerSample = int(binary.LittleEndian.Uint16(fmtData[14:16]))}case "data":dataOffset, _ = file.Seek(0, 1) // Current positiondataSize = chunkSize// Done - we found the data chunkgoto foundDatadefault:// Skip unknown chunkif _, err := file.Seek(chunkSize, 1); err != nil {return nil, 0, fmt.Errorf("failed to skip chunk: %w", err)}}// Word alignif chunkSize%2 != 0 {if _, err := file.Seek(1, 1); err != nil {return nil, 0, fmt.Errorf("failed to skip padding: %w", err)}}}return nil, 0, fmt.Errorf("no data chunk found in WAV file")foundData:if sampleRate == 0 || channels == 0 || bitsPerSample == 0 {return nil, 0, fmt.Errorf("missing or invalid fmt chunk")}// Read audio dataif _, err := file.Seek(dataOffset, 0); err != nil {return nil, 0, fmt.Errorf("failed to seek to data: %w", err)}audioData := make([]byte, dataSize)if _, err := io.ReadFull(file, audioData); err != nil {return nil, 0, fmt.Errorf("failed to read audio data: %w", err)}// Convert to float64 samplessamples := convertToFloat64(audioData, bitsPerSample, channels)return samples, sampleRate, nil}// convertToFloat64 converts raw audio bytes to float64 samples// Returns mono (left channel only for stereo)func convertToFloat64(data []byte, bitsPerSample, channels int) []float64 {bytesPerSample := bitsPerSample / 8blockAlign := bytesPerSample * channelsnumSamples := len(data) / blockAlignsamples := make([]float64, numSamples)switch bitsPerSample {case 16:for i := 0; i < numSamples; i++ {// Read first (left) channel only for stereooffset := i * blockAlignsample := int16(binary.LittleEndian.Uint16(data[offset : offset+2]))samples[i] = float64(sample) / 32768.0}case 24:for i := 0; i < numSamples; i++ {offset := i * blockAlign// 24-bit signed, little-endianb := data[offset : offset+3]sample := int32(b[0]) | int32(b[1])<<8 | int32(b[2])<<16// Sign extendif sample >= 0x800000 {sample -= 0x1000000}samples[i] = float64(sample) / 8388608.0}case 32:for i := 0; i < numSamples; i++ {offset := i * blockAlignsample := int32(binary.LittleEndian.Uint32(data[offset : offset+4]))samples[i] = float64(sample) / 2147483648.0}default:// Fallback: treat as 16-bitfor i := 0; i < numSamples; i++ {offset := i * blockAlignsample := int16(binary.LittleEndian.Uint16(data[offset : offset+2]))samples[i] = float64(sample) / 32768.0}}return samples}
package utilsimport ("math""github.com/madelynnblue/go-dsp/fft""github.com/madelynnblue/go-dsp/window")// SpectrogramConfig holds STFT parameterstype SpectrogramConfig struct {WindowSize int // FFT window size (e.g., 400)HopSize int // Hop between windows (e.g., 200 for 50% overlap)SampleRate int // Sample rate in Hz}// DefaultSpectrogramConfig returns default config matching Julia implementationfunc DefaultSpectrogramConfig(sampleRate int) SpectrogramConfig {return SpectrogramConfig{WindowSize: 400,HopSize: 200, // 50% overlap (window/2)SampleRate: sampleRate,}}// GenerateSpectrogram generates a spectrogram from audio samples.// Returns a 2D array of uint8 (0-255) where:// - First dimension is frequency bins (rows)// - Second dimension is time frames (columns)func GenerateSpectrogram(samples []float64, cfg SpectrogramConfig) [][]uint8 {if len(samples) < cfg.WindowSize {return nil}// Generate Hann windowhannWindow := window.Hann(cfg.WindowSize)// Calculate number of framesnumFrames := (len(samples)-cfg.WindowSize)/cfg.HopSize + 1if numFrames <= 0 {return nil}// Number of frequency bins (half of FFT due to symmetry)numFreqBins := cfg.WindowSize/2 + 1// Allocate power spectrum matrix (freq bins x time frames)powerMatrix := make([][]float64, numFreqBins)for i := range powerMatrix {powerMatrix[i] = make([]float64, numFrames)}// Perform STFTfor frame := 0; frame < numFrames; frame++ {start := frame * cfg.HopSize// Extract and window the frameframeData := make([]float64, cfg.WindowSize)for i := 0; i < cfg.WindowSize; i++ {frameData[i] = samples[start+i] * hannWindow[i]}// Compute FFTfftResult := fft.FFTReal(frameData)// Compute power spectrum (magnitude squared)for bin := 0; bin < numFreqBins; bin++ {re := real(fftResult[bin])im := imag(fftResult[bin])power := re*re + im*impowerMatrix[bin][frame] = power}}// Handle zeros (replace with smallest non-zero value)replaceZeros(powerMatrix)// Convert to dB, normalize, and convert to uint8return normalizeToUint8(powerMatrix)}// replaceZeros replaces zero values with the smallest non-zero valuefunc replaceZeros(matrix [][]float64) {// Find smallest non-zero valueminNonZero := math.MaxFloat64for _, row := range matrix {for _, val := range row {if val > 0 && val < minNonZero {minNonZero = val}}}// Replace zerosif minNonZero != math.MaxFloat64 {for i, row := range matrix {for j, val := range row {if val == 0 {matrix[i][j] = minNonZero}}}}}// normalizeToUint8 converts power to dB, normalizes to 0-255func normalizeToUint8(powerMatrix [][]float64) [][]uint8 {rows := len(powerMatrix)if rows == 0 {return nil}cols := len(powerMatrix[0])// Convert to dB and find min/maxdbMatrix := make([][]float64, rows)for i := range dbMatrix {dbMatrix[i] = make([]float64, cols)}minDB := math.MaxFloat64maxDB := -math.MaxFloat64for i, row := range powerMatrix {for j, power := range row {// Power to dB: 10 * log10(power)db := 10.0 * math.Log10(power)dbMatrix[i][j] = dbif db < minDB {minDB = db}if db > maxDB {maxDB = db}}}// Normalize to 0-255result := make([][]uint8, rows)for i := range result {result[i] = make([]uint8, cols)}rangeDB := maxDB - minDBif rangeDB == 0 {rangeDB = 1 // Avoid division by zero}for i, row := range dbMatrix {for j, db := range row {// Shift to non-negative, normalize to 0-1, then to 0-255normalized := (db - minDB) / rangeDBresult[i][j] = uint8(normalized * 255.0)}}// Flip vertically (low frequencies at bottom)flipVertical(result)return result}// flipVertical flips the matrix verticallyfunc flipVertical(matrix [][]uint8) {rows := len(matrix)for i := 0; i < rows/2; i++ {matrix[i], matrix[rows-1-i] = matrix[rows-1-i], matrix[i]}}// ExtractSegmentSamples extracts samples from a time rangefunc ExtractSegmentSamples(samples []float64, sampleRate int, startSec, endSec float64) []float64 {startIdx := int(startSec * float64(sampleRate))endIdx := int(endSec * float64(sampleRate))if startIdx < 0 {startIdx = 0}if endIdx > len(samples) {endIdx = len(samples)}if startIdx >= endIdx {return nil}return samples[startIdx:endIdx]}
package utilsimport ("bytes""encoding/base64""fmt""image""image/color""io")// WriteKittyImage writes an image to the writer using the Kitty graphics protocol.// The image is encoded as PNG, base64'd, and sent via Kitty escape sequences.func WriteKittyImage(img image.Image, w io.Writer) error {// Encode to PNGvar pngBuf bytes.Bufferif err := encodePNG(img, &pngBuf); err != nil {return fmt.Errorf("failed to encode PNG: %w", err)}// Base64 encodebase64Data := base64.StdEncoding.EncodeToString(pngBuf.Bytes())// Write Kitty protocol: ESC _ G f=100,a=T;{base64_data} ESC \// f=100 = PNG format// a=T = transmit and displayfmt.Fprintf(w, "\x1b_Gf=100,a=T;%s\x1b\\", base64Data)return nil}// encodePNG encodes an image to PNG formatfunc encodePNG(img image.Image, w io.Writer) error {// Use custom PNG encoder to avoid import cyclesreturn encodeGrayscalePNG(img, w)}// CreateGrayscaleImage creates an image.Image from a 2D uint8 array.// The array is organized as [rows][cols] where rows = frequency bins.func CreateGrayscaleImage(data [][]uint8) image.Image {if len(data) == 0 || len(data[0]) == 0 {return nil}height := len(data)width := len(data[0])img := image.NewGray(image.Rect(0, 0, width, height))for y := 0; y < height; y++ {for x := 0; x < width; x++ {img.SetGray(x, y, color.Gray{Y: data[y][x]})}}return img}// ResizeImage resizes an image using nearest-neighbor interpolation.// For higher quality, use golang.org/x/image/draw, but this keeps dependencies minimal.func ResizeImage(img image.Image, newWidth, newHeight int) image.Image {bounds := img.Bounds()srcWidth := bounds.Dx()srcHeight := bounds.Dy()result := image.NewGray(image.Rect(0, 0, newWidth, newHeight))scaleX := float64(srcWidth) / float64(newWidth)scaleY := float64(srcHeight) / float64(newHeight)for y := 0; y < newHeight; y++ {for x := 0; x < newWidth; x++ {srcX := int(float64(x) * scaleX)srcY := int(float64(y) * scaleY)if srcX >= srcWidth {srcX = srcWidth - 1}if srcY >= srcHeight {srcY = srcHeight - 1}// Get pixel colorc := img.At(srcX+bounds.Min.X, srcY+bounds.Min.Y)gray := color.GrayModel.Convert(c).(color.Gray)result.SetGray(x, y, gray)}}return result}// encodeGrayscalePNG encodes a grayscale image to PNG format.// This is a minimal implementation that works for 8-bit grayscale images.func encodeGrayscalePNG(img image.Image, w io.Writer) error {bounds := img.Bounds()width := bounds.Dx()height := bounds.Dy()// PNG signaturesignature := []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}if _, err := w.Write(signature); err != nil {return err}// IHDR chunkihdr := make([]byte, 13)ihdr[0] = byte(width >> 24)ihdr[1] = byte(width >> 16)ihdr[2] = byte(width >> 8)ihdr[3] = byte(width)ihdr[4] = byte(height >> 24)ihdr[5] = byte(height >> 16)ihdr[6] = byte(height >> 8)ihdr[7] = byte(height)ihdr[8] = 8 // bit depthihdr[9] = 0 // color type: grayscaleihdr[10] = 0 // compression: deflateihdr[11] = 0 // filter: adaptiveihdr[12] = 0 // interlace: noneif err := writePNGChunk(w, "IHDR", ihdr); err != nil {return err}// IDAT chunk (image data)rawData := make([]byte, 0, height*(width+1))for y := 0; y < height; y++ {rawData = append(rawData, 0) // filter byte (none)for x := 0; x < width; x++ {c := color.GrayModel.Convert(img.At(x+bounds.Min.X, y+bounds.Min.Y)).(color.Gray)rawData = append(rawData, c.Y)}}compressed := deflateCompress(rawData)if err := writePNGChunk(w, "IDAT", compressed); err != nil {return err}// IEND chunkif err := writePNGChunk(w, "IEND", nil); err != nil {return err}return nil}// writePNGChunk writes a PNG chunk with CRCfunc writePNGChunk(w io.Writer, chunkType string, data []byte) error {// Length (4 bytes, big-endian)length := uint32(len(data))if _, err := w.Write([]byte{byte(length >> 24),byte(length >> 16),byte(length >> 8),byte(length),}); err != nil {return err}// Chunk type (4 bytes)if _, err := w.Write([]byte(chunkType)); err != nil {return err}// Chunk dataif len(data) > 0 {if _, err := w.Write(data); err != nil {return err}}// CRC32 of chunk type + datacrc := crc32PNG([]byte(chunkType), data)if _, err := w.Write([]byte{byte(crc >> 24),byte(crc >> 16),byte(crc >> 8),byte(crc),}); err != nil {return err}return nil}// crc32PNG computes CRC32 for PNGfunc crc32PNG(chunkType, data []byte) uint32 {crc := uint32(0xFFFFFFFF)// Process chunk typefor _, b := range chunkType {crc = updateCRC32(crc, b)}// Process datafor _, b := range data {crc = updateCRC32(crc, b)}return crc ^ 0xFFFFFFFF}// updateCRC32 updates CRC with one bytefunc updateCRC32(crc uint32, b byte) uint32 {// CRC32 polynomial table (standard)const poly = 0xEDB88320crc ^= uint32(b)for i := 0; i < 8; i++ {if crc&1 != 0 {crc = (crc >> 1) ^ poly} else {crc >>= 1}}return crc}// deflateCompress performs simple deflate compression// For simplicity, we use uncompressed deflate blocks (not optimal but works)func deflateCompress(data []byte) []byte {result := make([]byte, 0)// Write zlib headerresult = append(result, 0x78, 0x01) // deflate, level 1// Split into uncompressed blocks (max 65535 bytes each)offset := 0for offset < len(data) {blockSize := len(data) - offsetif blockSize > 65535 {blockSize = 65535}// Block header: final=1 if last block, type=00 (uncompressed)isFinal := byte(0)if offset+blockSize >= len(data) {isFinal = 1}// Store uncompressed blockresult = append(result, isFinal) // BFINAL + BTYPE=00len := uint16(blockSize)nlen := ^lenresult = append(result, byte(len), byte(len>>8))result = append(result, byte(nlen), byte(nlen>>8))result = append(result, data[offset:offset+blockSize]...)offset += blockSize}// Add Adler-32 checksumadler := adler32(data)result = append(result, byte(adler>>24), byte(adler>>16), byte(adler>>8), byte(adler))return result}// adler32 computes Adler-32 checksumfunc adler32(data []byte) uint32 {const mod = 65521a, b := uint32(1), uint32(0)for _, c := range data {a = (a + uint32(c)) % modb = (b + a) % mod}return (b << 16) | a}
package toolsimport ("encoding/json""fmt""os""strings""skraak/utils")// CallsShowImagesInput defines the input for the show-images tooltype CallsShowImagesInput struct {DataFilePath string `json:"data_file_path" jsonschema:"required,Path to .data file"`}// CallsShowImagesOutput defines the output for the show-images tooltype CallsShowImagesOutput struct {SegmentsShown int `json:"segments_shown"`WavFile string `json:"wav_file"`Error string `json:"error,omitempty"`}// Segment represents a detection segment in a .data filetype Segment struct {StartTime float64EndTime float64FreqLow float64FreqHigh float64}// CallsShowImages reads a .data file and displays spectrogram images for each segmentfunc CallsShowImages(input CallsShowImagesInput) (CallsShowImagesOutput, error) {var output CallsShowImagesOutput// Validate file existsif _, err := os.Stat(input.DataFilePath); os.IsNotExist(err) {output.Error = fmt.Sprintf("File not found: %s", input.DataFilePath)return output, fmt.Errorf("%s", output.Error)}// Derive WAV file path (strip .data suffix)wavPath := strings.TrimSuffix(input.DataFilePath, ".data")output.WavFile = wavPath// Check WAV file existsif _, err := os.Stat(wavPath); os.IsNotExist(err) {output.Error = fmt.Sprintf("WAV file not found: %s", wavPath)return output, fmt.Errorf("%s", output.Error)}// Parse .data filesegments, err := parseDataFile(input.DataFilePath)if err != nil {output.Error = err.Error()return output, fmt.Errorf("%s", output.Error)}if len(segments) == 0 {output.Error = "No segments found in .data file"return output, fmt.Errorf("%s", output.Error)}// Read WAV samplessamples, sampleRate, err := utils.ReadWAVSamples(wavPath)if err != nil {output.Error = fmt.Sprintf("Failed to read WAV file: %v", err)return output, fmt.Errorf("%s", output.Error)}// Generate spectrogram for each segment and output via Kitty protocolconfig := utils.DefaultSpectrogramConfig(sampleRate)for i, segment := range segments {// Extract samples for this segment's time rangesegmentSamples := utils.ExtractSegmentSamples(samples, sampleRate, segment.StartTime, segment.EndTime)if len(segmentSamples) == 0 {continue}// Generate spectrogramspectrogram := utils.GenerateSpectrogram(segmentSamples, config)if spectrogram == nil {continue}// Create imageimg := utils.CreateGrayscaleImage(spectrogram)if img == nil {continue}// Resize to 224x224resized := utils.ResizeImage(img, 224, 224)// Write to stdout via Kitty protocol// Add newline between images for separationif i > 0 {fmt.Println()}if err := utils.WriteKittyImage(resized, os.Stdout); err != nil {output.Error = fmt.Sprintf("Failed to write image: %v", err)return output, fmt.Errorf("%s", output.Error)}fmt.Println() // Newline after image}output.SegmentsShown = len(segments)return output, nil}// parseDataFile parses an AviaNZ .data file and extracts segmentsfunc parseDataFile(path string) ([]Segment, error) {file, err := os.Open(path)if err != nil {return nil, fmt.Errorf("failed to open .data file: %v", err)}defer file.Close()// Parse JSON arrayvar data []interface{}decoder := json.NewDecoder(file)if err := decoder.Decode(&data); err != nil {return nil, fmt.Errorf("failed to parse .data file: %v", err)}// First element is metadata, rest are segmentsif len(data) < 2 {return nil, nil}var segments []Segmentfor i := 1; i < len(data); i++ {seg, ok := data[i].([]interface{})if !ok || len(seg) < 5 {continue}// Parse segment: [start_time, end_time, freq_low, freq_high, labels]startTime, ok1 := seg[0].(float64)endTime, ok2 := seg[1].(float64)freqLow, ok3 := seg[2].(float64)freqHigh, ok4 := seg[3].(float64)if !ok1 || !ok2 || !ok3 || !ok4 {continue}segments = append(segments, Segment{StartTime: startTime,EndTime: endTime,FreqLow: freqLow,FreqHigh: freqHigh,})}return segments, nil}
// runCallsShowImages handles the "calls show-images" subcommandfunc runCallsShowImages(args []string) {fs := flag.NewFlagSet("calls show-images", flag.ExitOnError)filePath := fs.String("file", "", "Path to .data file (required)")fs.Usage = func() {fmt.Fprintf(os.Stderr, "Usage: skraak calls show-images [options]\n\n")fmt.Fprintf(os.Stderr, "Display spectrogram images for each segment in a .data file.\n")fmt.Fprintf(os.Stderr, "Images are output using the Kitty graphics protocol.\n\n")fmt.Fprintf(os.Stderr, "Options:\n")fs.PrintDefaults()fmt.Fprintf(os.Stderr, "\nExamples:\n")fmt.Fprintf(os.Stderr, " skraak calls show-images --file recording.wav.data\n")}if err := fs.Parse(args); err != nil {os.Exit(1)}// Validate required flagsif *filePath == "" {fmt.Fprintf(os.Stderr, "Error: --file is required\n\n")fs.Usage()os.Exit(1)}input := tools.CallsShowImagesInput{DataFilePath: *filePath,}fmt.Fprintf(os.Stderr, "Showing spectrogram images for: %s\n", *filePath)output, err := tools.CallsShowImages(input)if err != nil {fmt.Fprintf(os.Stderr, "Error: %v\n", err)os.Exit(1)}fmt.Fprintf(os.Stderr, "Displayed %d segment(s) from %s\n", output.SegmentsShown, output.WavFile)}