package spectrogram
import (
"fmt"
"image"
"math"
"os"
"path/filepath"
"strings"
"sync"
"github.com/madelynnblue/go-dsp/window"
"skraak/audio"
"skraak/wav"
)
var (
hannCache = map[int][]float64{}
hannCacheMu sync.RWMutex
)
func getCachedHannWindow(size int) []float64 {
hannCacheMu.RLock()
if w, ok := hannCache[size]; ok {
hannCacheMu.RUnlock()
return w
}
hannCacheMu.RUnlock()
hannCacheMu.Lock()
defer hannCacheMu.Unlock()
if w, ok := hannCache[size]; ok {
return w
}
w := window.Hann(size)
hannCache[size] = w
return w
}
type SpectrogramConfig struct {
WindowSize int HopSize int SampleRate int }
func DefaultSpectrogramConfig(sampleRate int) SpectrogramConfig {
return SpectrogramConfig{
WindowSize: 512,
HopSize: 256, SampleRate: sampleRate,
}
}
func GenerateSpectrogram(samples []float64, cfg SpectrogramConfig) [][]uint8 {
if len(samples) < cfg.WindowSize {
return nil
}
hannWindow := getCachedHannWindow(cfg.WindowSize)
numFrames := (len(samples)-cfg.WindowSize)/cfg.HopSize + 1
if numFrames <= 0 {
return nil
}
numFreqBins := cfg.WindowSize/2 + 1
powerFlat := make([]float64, numFreqBins*numFrames)
frameData := make([]float64, cfg.WindowSize)
scratch := make([]complex128, cfg.WindowSize)
framePower := make([]float64, numFreqBins)
for frame := range numFrames {
start := frame * cfg.HopSize
for i := 0; i < cfg.WindowSize; i++ {
frameData[i] = samples[start+i] * hannWindow[i]
}
audio.PowerSpectrumFFT(frameData, framePower, scratch)
for bin := range numFreqBins {
powerFlat[bin*numFrames+frame] = framePower[bin]
}
}
return normalizeFlat(powerFlat, numFreqBins, numFrames)
}
func convertToDB(power []float64) (minDB, maxDB float64) {
minNonZero := math.MaxFloat64
for _, val := range power {
if val > 0 && val < minNonZero {
minNonZero = val
}
}
if minNonZero == math.MaxFloat64 {
minNonZero = 1e-20
}
minDB = math.MaxFloat64
maxDB = -math.MaxFloat64
for i, val := range power {
if val <= 0 {
val = minNonZero
}
db := 10.0 * math.Log10(val)
power[i] = db
if db < minDB {
minDB = db
}
if db > maxDB {
maxDB = db
}
}
return minDB, maxDB
}
func normalizeFlat(power []float64, rows, cols int) [][]uint8 {
if rows == 0 || cols == 0 {
return nil
}
minDB, maxDB := convertToDB(power)
rangeDB := maxDB - minDB
if rangeDB == 0 {
rangeDB = 1
}
scale := 255.0 / rangeDB
resultFlat := make([]uint8, rows*cols)
result := make([][]uint8, rows)
for i := range result {
srcRow := rows - 1 - i
result[i] = resultFlat[i*cols : (i+1)*cols]
srcOff := srcRow * cols
for j := range cols {
result[i][j] = uint8((power[srcOff+j] - minDB) * scale)
}
}
return result
}
func ExtractSegmentSamples(samples []float64, sampleRate int, startSec, endSec float64) []float64 {
startIdx := int(startSec * float64(sampleRate))
endIdx := int(endSec * float64(sampleRate))
if startIdx < 0 {
startIdx = 0
}
if endIdx > len(samples) {
endIdx = len(samples)
}
if startIdx >= endIdx {
return nil
}
return samples[startIdx:endIdx]
}
func SpectrogramImageFromSamples(samples []float64, sampleRate int, color bool, imgSize int) image.Image {
if len(samples) == 0 {
return nil
}
config := DefaultSpectrogramConfig(sampleRate)
spectrogram := GenerateSpectrogram(samples, config)
if spectrogram == nil {
return nil
}
var img image.Image
if color {
colorData := ApplyL4Colormap(spectrogram)
img = CreateRGBImage(colorData)
} else {
img = CreateGrayscaleImage(spectrogram)
}
if img == nil {
return nil
}
imgSize = ClampImageSize(imgSize)
return ResizeImage(img, imgSize, imgSize)
}
func GenerateSegmentSpectrogram(dataFilePath string, startTime, endTime float64, color bool, imgSize int) (image.Image, error) {
wavPath := strings.TrimSuffix(dataFilePath, ".data")
segSamples, sampleRate, err := wav.ReadWAVSegmentSamples(wavPath, startTime, endTime)
if err != nil {
return nil, err
}
if len(segSamples) == 0 {
return nil, nil
}
if sampleRate > audio.DefaultMaxSampleRate {
segSamples = audio.ResampleRate(segSamples, sampleRate, audio.DefaultMaxSampleRate)
sampleRate = audio.DefaultMaxSampleRate
}
img := SpectrogramImageFromSamples(segSamples, sampleRate, color, imgSize)
return img, nil
}
func WritePNGFile(path string, img image.Image) error {
file, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0644)
if err != nil {
if os.IsExist(err) {
return fmt.Errorf("file already exists: %s", path)
}
return fmt.Errorf("failed to create PNG: %w", err)
}
if err := WritePNG(img, file); err != nil {
_ = file.Close()
return fmt.Errorf("failed to write PNG: %w", err)
}
if err := file.Close(); err != nil {
return fmt.Errorf("failed to close PNG: %w", err)
}
return nil
}
func ClipBaseName(prefix, basename string, startTime, endTime float64) string {
startInt := int(math.Floor(startTime))
endInt := int(math.Ceil(endTime))
return fmt.Sprintf("%s_%s_%d_%d", prefix, basename, startInt, endInt)
}
func ClipPaths(outputDir, prefix, basename string, startTime, endTime float64) (pngPath, wavPath string, err error) {
baseName := ClipBaseName(prefix, basename, startTime, endTime)
pngPath = filepath.Join(outputDir, baseName+".png")
wavPath = filepath.Join(outputDir, baseName+".wav")
if _, err := os.Stat(pngPath); err == nil {
return "", "", fmt.Errorf("file already exists: %s", pngPath)
}
if _, err := os.Stat(wavPath); err == nil {
return "", "", fmt.Errorf("file already exists: %s", wavPath)
}
return pngPath, wavPath, nil
}
func WAVBasename(dataFilePath string) string {
wavPath := strings.TrimSuffix(dataFilePath, ".data")
basename := filepath.Base(wavPath)
return strings.TrimSuffix(basename, filepath.Ext(basename))
}