package tools
import (
"context"
"database/sql"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"time"
gonanoid "github.com/matoous/go-nanoid/v2"
"github.com/modelcontextprotocol/go-sdk/mcp"
"skraak_mcp/db"
"skraak_mcp/utils"
)
type ImportAudioFilesInput struct {
FolderPath string `json:"folder_path" jsonschema:"required,Absolute path to folder containing WAV files"`
DatasetID string `json:"dataset_id" jsonschema:"required,Dataset ID (12 characters)"`
LocationID string `json:"location_id" jsonschema:"required,Location ID (12 characters)"`
ClusterID string `json:"cluster_id" jsonschema:"required,Cluster ID (12 characters)"`
Recursive *bool `json:"recursive,omitempty" jsonschema:"Scan subfolders recursively (default: true)"`
}
type ImportAudioFilesOutput struct {
Summary ImportSummary `json:"summary" jsonschema:"Import summary with counts and statistics"`
FileIDs []string `json:"file_ids" jsonschema:"List of successfully imported file IDs"`
Errors []FileImportError `json:"errors,omitempty" jsonschema:"Errors encountered during import (if any)"`
}
type ImportSummary struct {
TotalFiles int `json:"total_files"`
ImportedFiles int `json:"imported_files"`
SkippedFiles int `json:"skipped_files"` FailedFiles int `json:"failed_files"`
AudioMothFiles int `json:"audiomoth_files"`
TotalDuration float64 `json:"total_duration_seconds"`
ProcessingTime string `json:"processing_time"`
}
type FileImportError struct {
FileName string `json:"file_name"`
Error string `json:"error"`
Stage string `json:"stage"` }
type fileData struct {
FileName string
FilePath string
Hash string
Duration float64
SampleRate int
TimestampLocal time.Time
IsAudioMoth bool
MothData *utils.AudioMothData
AstroData utils.AstronomicalData
}
func ImportAudioFiles(
ctx context.Context,
req *mcp.CallToolRequest,
input ImportAudioFilesInput,
) (*mcp.CallToolResult, ImportAudioFilesOutput, error) {
startTime := time.Now()
var output ImportAudioFilesOutput
recursive := true
if input.Recursive != nil {
recursive = *input.Recursive
}
if err := validateImportInput(input, dbPath); err != nil {
return nil, output, fmt.Errorf("validation failed: %w", err)
}
wavFiles, err := scanWAVFiles(input.FolderPath, recursive)
if err != nil {
return nil, output, fmt.Errorf("failed to scan folder: %w", err)
}
if len(wavFiles) == 0 {
output = ImportAudioFilesOutput{
Summary: ImportSummary{
TotalFiles: 0,
ProcessingTime: time.Since(startTime).String(),
},
FileIDs: []string{},
Errors: []FileImportError{},
}
return &mcp.CallToolResult{}, output, nil
}
locationData, err := getLocationData(dbPath, input.LocationID)
if err != nil {
return nil, output, fmt.Errorf("failed to get location data: %w", err)
}
filesData, errors := processFiles(wavFiles, locationData)
importedFiles, skippedFiles, insertErrors := insertFilesIntoDB(
dbPath,
filesData,
input.DatasetID,
input.ClusterID,
input.LocationID,
)
allErrors := append(errors, insertErrors...)
audiomothCount := 0
totalDuration := 0.0
for _, fd := range filesData {
if fd.IsAudioMoth {
audiomothCount++
}
totalDuration += fd.Duration
}
summary := ImportSummary{
TotalFiles: len(wavFiles),
ImportedFiles: importedFiles,
SkippedFiles: skippedFiles,
FailedFiles: len(allErrors),
AudioMothFiles: audiomothCount,
TotalDuration: totalDuration,
ProcessingTime: time.Since(startTime).String(),
}
fileIDs := make([]string, 0, len(filesData))
output = ImportAudioFilesOutput{
Summary: summary,
FileIDs: fileIDs,
Errors: allErrors,
}
return &mcp.CallToolResult{}, output, nil
}
func validateImportInput(input ImportAudioFilesInput, dbPath string) error {
info, err := os.Stat(input.FolderPath)
if err != nil {
return fmt.Errorf("folder not accessible: %w", err)
}
if !info.IsDir() {
return fmt.Errorf("path is not a directory: %s", input.FolderPath)
}
database, err := db.OpenReadOnlyDB(dbPath)
if err != nil {
return fmt.Errorf("failed to open database: %w", err)
}
defer database.Close()
var datasetExists bool
err = database.QueryRow("SELECT EXISTS(SELECT 1 FROM dataset WHERE id = ? AND active = true)", input.DatasetID).Scan(&datasetExists)
if err != nil {
return fmt.Errorf("failed to query dataset: %w", err)
}
if !datasetExists {
return fmt.Errorf("dataset not found or inactive: %s", input.DatasetID)
}
var locationDatasetID string
err = database.QueryRow("SELECT dataset_id FROM location WHERE id = ? AND active = true", input.LocationID).Scan(&locationDatasetID)
if err == sql.ErrNoRows {
return fmt.Errorf("location not found or inactive: %s", input.LocationID)
}
if err != nil {
return fmt.Errorf("failed to query location: %w", err)
}
if locationDatasetID != input.DatasetID {
return fmt.Errorf("location %s does not belong to dataset %s", input.LocationID, input.DatasetID)
}
var clusterLocationID string
err = database.QueryRow("SELECT location_id FROM cluster WHERE id = ? AND active = true", input.ClusterID).Scan(&clusterLocationID)
if err == sql.ErrNoRows {
return fmt.Errorf("cluster not found or inactive: %s", input.ClusterID)
}
if err != nil {
return fmt.Errorf("failed to query cluster: %w", err)
}
if clusterLocationID != input.LocationID {
return fmt.Errorf("cluster %s does not belong to location %s", input.ClusterID, input.LocationID)
}
return nil
}
type locationData struct {
Latitude float64
Longitude float64
TimezoneID string
}
func getLocationData(dbPath, locationID string) (*locationData, error) {
database, err := db.OpenReadOnlyDB(dbPath)
if err != nil {
return nil, err
}
defer database.Close()
var loc locationData
err = database.QueryRow(
"SELECT latitude, longitude, timezone_id FROM location WHERE id = ?",
locationID,
).Scan(&loc.Latitude, &loc.Longitude, &loc.TimezoneID)
if err != nil {
return nil, fmt.Errorf("failed to query location data: %w", err)
}
return &loc, nil
}
func scanWAVFiles(rootPath string, recursive bool) ([]string, error) {
var wavFiles []string
if recursive {
err := filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() && strings.HasPrefix(info.Name(), "Clips_") {
return filepath.SkipDir
}
if !info.IsDir() {
ext := strings.ToLower(filepath.Ext(path))
if ext == ".wav" && info.Size() > 0 {
wavFiles = append(wavFiles, path)
}
}
return nil
})
if err != nil {
return nil, err
}
} else {
entries, err := os.ReadDir(rootPath)
if err != nil {
return nil, err
}
for _, entry := range entries {
if !entry.IsDir() {
name := entry.Name()
ext := strings.ToLower(filepath.Ext(name))
if ext == ".wav" {
path := filepath.Join(rootPath, name)
if info, err := os.Stat(path); err == nil && info.Size() > 0 {
wavFiles = append(wavFiles, path)
}
}
}
}
}
sort.Strings(wavFiles)
return wavFiles, nil
}
func processFiles(wavFiles []string, location *locationData) ([]*fileData, []FileImportError) {
var filesData []*fileData
var errors []FileImportError
type wavInfo struct {
path string
metadata *utils.WAVMetadata
err error
}
wavInfos := make([]wavInfo, len(wavFiles))
for i, path := range wavFiles {
metadata, err := utils.ParseWAVHeader(path)
wavInfos[i] = wavInfo{path: path, metadata: metadata, err: err}
}
var filenamesForParsing []string
var filenameIndices []int
for i, info := range wavInfos {
if info.err != nil {
errors = append(errors, FileImportError{
FileName: filepath.Base(info.path),
Error: info.err.Error(),
Stage: "parse",
})
continue
}
if utils.HasTimestampFilename(info.path) {
filenamesForParsing = append(filenamesForParsing, filepath.Base(info.path))
filenameIndices = append(filenameIndices, i)
}
}
filenameTimestampMap := make(map[int]time.Time)
if len(filenamesForParsing) > 0 {
filenameTimestamps, err := utils.ParseFilenameTimestamps(filenamesForParsing)
if err != nil {
for _, idx := range filenameIndices {
errors = append(errors, FileImportError{
FileName: filepath.Base(wavInfos[idx].path),
Error: fmt.Sprintf("filename timestamp parsing failed: %v", err),
Stage: "parse",
})
}
} else {
adjustedTimestamps, err := utils.ApplyTimezoneOffset(filenameTimestamps, location.TimezoneID)
if err != nil {
for _, idx := range filenameIndices {
errors = append(errors, FileImportError{
FileName: filepath.Base(wavInfos[idx].path),
Error: fmt.Sprintf("timezone offset failed: %v", err),
Stage: "parse",
})
}
} else {
for j, idx := range filenameIndices {
filenameTimestampMap[idx] = adjustedTimestamps[j]
}
}
}
}
for i, info := range wavInfos {
if info.err != nil {
continue }
hash, err := utils.ComputeXXH64(info.path)
if err != nil {
errors = append(errors, FileImportError{
FileName: filepath.Base(info.path),
Error: fmt.Sprintf("hash calculation failed: %v", err),
Stage: "hash",
})
continue
}
var timestampLocal time.Time
var isAudioMoth bool
var mothData *utils.AudioMothData
if utils.IsAudioMoth(info.metadata.Comment, info.metadata.Artist) {
isAudioMoth = true
mothData, err = utils.ParseAudioMothComment(info.metadata.Comment)
if err == nil {
timestampLocal = mothData.Timestamp
} else {
errors = append(errors, FileImportError{
FileName: filepath.Base(info.path),
Error: fmt.Sprintf("AudioMoth comment parsing failed: %v", err),
Stage: "parse",
})
}
}
if timestampLocal.IsZero() {
if ts, ok := filenameTimestampMap[i]; ok {
timestampLocal = ts
}
}
if timestampLocal.IsZero() {
errors = append(errors, FileImportError{
FileName: filepath.Base(info.path),
Error: "no timestamp available (not AudioMoth and filename not parseable)",
Stage: "parse",
})
continue
}
astroData := utils.CalculateAstronomicalData(
timestampLocal.UTC(),
info.metadata.Duration,
location.Latitude,
location.Longitude,
)
filesData = append(filesData, &fileData{
FileName: filepath.Base(info.path),
FilePath: info.path,
Hash: hash,
Duration: info.metadata.Duration,
SampleRate: info.metadata.SampleRate,
TimestampLocal: timestampLocal,
IsAudioMoth: isAudioMoth,
MothData: mothData,
AstroData: astroData,
})
}
return filesData, errors
}
func insertFilesIntoDB(
dbPath string,
filesData []*fileData,
datasetID, clusterID, locationID string,
) (imported, skipped int, errors []FileImportError) {
database, err := db.OpenWriteableDB(dbPath)
if err != nil {
errors = append(errors, FileImportError{
FileName: "",
Error: fmt.Sprintf("failed to open database: %v", err),
Stage: "insert",
})
return 0, 0, errors
}
defer database.Close()
ctx := context.Background()
tx, err := database.BeginTx(ctx, nil)
if err != nil {
errors = append(errors, FileImportError{
FileName: "",
Error: fmt.Sprintf("failed to begin transaction: %v", err),
Stage: "insert",
})
return 0, 0, errors
}
defer tx.Rollback()
fileStmt, err := tx.PrepareContext(ctx, `
INSERT INTO file (
id, file_name, path, xxh64_hash, location_id, timestamp_local,
cluster_id, duration, sample_rate, maybe_solar_night, maybe_civil_night,
moon_phase, created_at, last_modified, active
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'), true)
`)
if err != nil {
errors = append(errors, FileImportError{
FileName: "",
Error: fmt.Sprintf("failed to prepare file statement: %v", err),
Stage: "insert",
})
return 0, 0, errors
}
defer fileStmt.Close()
datasetStmt, err := tx.PrepareContext(ctx, `
INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified)
VALUES (?, ?, datetime('now'), datetime('now'))
`)
if err != nil {
errors = append(errors, FileImportError{
FileName: "",
Error: fmt.Sprintf("failed to prepare dataset statement: %v", err),
Stage: "insert",
})
return 0, 0, errors
}
defer datasetStmt.Close()
mothStmt, err := tx.PrepareContext(ctx, `
INSERT INTO moth_metadata (
file_id, timestamp, recorder_id, gain, battery_v, temp_c,
created_at, last_modified, active
) VALUES (?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'), true)
`)
if err != nil {
errors = append(errors, FileImportError{
FileName: "",
Error: fmt.Sprintf("failed to prepare moth statement: %v", err),
Stage: "insert",
})
return 0, 0, errors
}
defer mothStmt.Close()
for _, fd := range filesData {
var exists bool
err = tx.QueryRowContext(ctx,
"SELECT EXISTS(SELECT 1 FROM file WHERE xxh64_hash = ?)",
fd.Hash,
).Scan(&exists)
if err != nil {
errors = append(errors, FileImportError{
FileName: fd.FileName,
Error: fmt.Sprintf("duplicate check failed: %v", err),
Stage: "insert",
})
continue
}
if exists {
skipped++
continue
}
fileID, err := gonanoid.Generate("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", 21)
if err != nil {
errors = append(errors, FileImportError{
FileName: fd.FileName,
Error: fmt.Sprintf("ID generation failed: %v", err),
Stage: "insert",
})
continue
}
_, err = fileStmt.ExecContext(ctx,
fileID, fd.FileName, fd.FilePath, fd.Hash, locationID,
fd.TimestampLocal, clusterID, fd.Duration, fd.SampleRate,
fd.AstroData.SolarNight, fd.AstroData.CivilNight, fd.AstroData.MoonPhase,
)
if err != nil {
errors = append(errors, FileImportError{
FileName: fd.FileName,
Error: fmt.Sprintf("file insert failed: %v", err),
Stage: "insert",
})
continue
}
_, err = datasetStmt.ExecContext(ctx, fileID, datasetID)
if err != nil {
errors = append(errors, FileImportError{
FileName: fd.FileName,
Error: fmt.Sprintf("file_dataset insert failed: %v", err),
Stage: "insert",
})
continue
}
if fd.IsAudioMoth && fd.MothData != nil {
_, err = mothStmt.ExecContext(ctx,
fileID,
fd.MothData.Timestamp,
&fd.MothData.RecorderID,
&fd.MothData.Gain,
&fd.MothData.BatteryV,
&fd.MothData.TempC,
)
if err != nil {
errors = append(errors, FileImportError{
FileName: fd.FileName,
Error: fmt.Sprintf("moth_metadata insert failed: %v", err),
Stage: "insert",
})
continue
}
}
imported++
}
err = tx.Commit()
if err != nil {
errors = append(errors, FileImportError{
FileName: "",
Error: fmt.Sprintf("transaction commit failed: %v", err),
Stage: "insert",
})
return 0, 0, errors
}
return imported, skipped, errors
}