quietlight / skraakCLI

{/} code [~] changes [>] discussions [*] jobs
calls_summarise.go
package calls

import (
	"sort"
	"strings"

	"skraak/datafile"
)

// CallsSummariseInput defines the input for the calls-summarise tool
type CallsSummariseInput struct {
	Folder string `json:"folder"`
	Brief  bool   `json:"brief"`
	Filter string `json:"filter,omitempty"`
}

// CallsSummariseOutput defines the output for the calls-summarise tool
type CallsSummariseOutput struct {
	Segments         []SegmentSummary       `json:"segments"`
	Folder           string                 `json:"folder"`
	DataFilesRead    int                    `json:"data_files_read"`
	DataFilesSkipped []string               `json:"data_files_skipped"`
	TotalSegments    int                    `json:"total_segments"`
	Filters          map[string]FilterStats `json:"filters"`
	ReviewStatus     ReviewStatus           `json:"review_status"`
	Operators        []string               `json:"operators"`
	Reviewers        []string               `json:"reviewers"`
	Error            *string                `json:"error,omitempty"`
}

// SegmentSummary represents a single segment in the output
type SegmentSummary struct {
	File      string         `json:"file"`
	StartTime float64        `json:"start_time"`
	EndTime   float64        `json:"end_time"`
	Labels    []LabelSummary `json:"labels"`
}

// LabelSummary represents a label in the output (omits empty fields)
type LabelSummary struct {
	Filter    string `json:"filter"`
	Certainty int    `json:"certainty"`
	Species   string `json:"species"`
	CallType  string `json:"calltype,omitempty"`
	Comment   string `json:"comment,omitempty"`
	Bookmark  bool   `json:"bookmark,omitempty"`
}

// FilterStats contains per-filter statistics
type FilterStats struct {
	Segments  int                       `json:"segments"`
	Species   map[string]int            `json:"species"`
	Calltypes map[string]map[string]int `json:"calltypes,omitempty"` // species -> calltype -> count
}

// ReviewStatus contains review progress statistics
type ReviewStatus struct {
	Unreviewed   int `json:"unreviewed"` // certainty < 100
	Confirmed    int `json:"confirmed"`  // certainty = 100
	DontKnow     int `json:"dont_know"`  // certainty = 0
	WithCallType int `json:"with_calltype"`
	WithComments int `json:"with_comments"`
	Bookmarked   int `json:"bookmarked"`
}

// CallsSummarise reads all .data files in a folder and produces a summary
func CallsSummarise(input CallsSummariseInput) (CallsSummariseOutput, error) {
	var output CallsSummariseOutput

	// Find all .data files
	filePaths, err := datafile.FindDataFiles(input.Folder)
	if err != nil {
		errMsg := err.Error()
		output.Error = &errMsg
		return output, err
	}

	// Initialize empty slices/maps (avoid null in JSON)
	output.Segments = make([]SegmentSummary, 0)
	output.Folder = input.Folder
	output.Filters = make(map[string]FilterStats)
	output.Operators = make([]string, 0)
	output.Reviewers = make([]string, 0)
	output.DataFilesSkipped = make([]string, 0)

	if len(filePaths) == 0 {
		return output, nil
	}

	// Track unique operators and reviewers
	operatorSet := make(map[string]bool)
	reviewerSet := make(map[string]bool)

	summariseFiles(filePaths, input, &output, operatorSet, reviewerSet)

	// Count segments for total
	if input.Brief {
		for _, fs := range output.Filters {
			output.TotalSegments += fs.Segments
		}
	} else {
		output.TotalSegments = len(output.Segments)
	}

	finaliseSummary(&output, operatorSet, reviewerSet, input.Brief)

	return output, nil
}

// summariseFiles processes all data files, populating output stats
func summariseFiles(filePaths []string, input CallsSummariseInput, output *CallsSummariseOutput, operatorSet, reviewerSet map[string]bool) {
	for _, path := range filePaths {
		df, err := datafile.ParseDataFile(path)
		if err != nil {
			output.DataFilesSkipped = append(output.DataFilesSkipped, path)
			continue
		}

		output.DataFilesRead++
		trackMeta(df.Meta, operatorSet, reviewerSet)

		var relPath string
		if !input.Brief {
			relPath = extractRelativePath(input.Folder, path)
		}

		for _, seg := range df.Segments {
			filteredLabels := filterLabels(seg.Labels, input.Filter)
			if input.Filter != "" && len(filteredLabels) == 0 {
				continue
			}

			updateStatsFromLabels(filteredLabels, output)

			if !input.Brief {
				output.Segments = append(output.Segments, SegmentSummary{
					File:      relPath,
					StartTime: seg.StartTime,
					EndTime:   seg.EndTime,
					Labels:    buildLabelSummaries(filteredLabels),
				})
			}
		}
	}
}

// trackMeta records operator and reviewer from file metadata
func trackMeta(meta *datafile.DataMeta, operatorSet, reviewerSet map[string]bool) {
	if meta == nil {
		return
	}
	if meta.Operator != "" {
		operatorSet[meta.Operator] = true
	}
	if meta.Reviewer != "" {
		reviewerSet[meta.Reviewer] = true
	}
}

// filterLabels returns labels matching the filter, or all labels if filter is empty
func filterLabels(labels []*datafile.Label, filter string) []*datafile.Label {
	if filter == "" {
		return labels
	}
	var filtered []*datafile.Label
	for _, l := range labels {
		if l.Filter == filter {
			filtered = append(filtered, l)
		}
	}
	return filtered
}

// buildLabelSummaries converts labels to label summaries
func buildLabelSummaries(labels []*datafile.Label) []LabelSummary {
	var summaries []LabelSummary
	for _, l := range labels {
		ls := LabelSummary{
			Filter:    l.Filter,
			Certainty: l.Certainty,
			Species:   l.Species,
		}
		if l.CallType != "" {
			ls.CallType = l.CallType
		}
		if l.Comment != "" {
			ls.Comment = l.Comment
		}
		if l.Bookmark {
			ls.Bookmark = true
		}
		summaries = append(summaries, ls)
	}
	return summaries
}

// updateStatsFromLabels updates filter stats and review status from a set of labels
func updateStatsFromLabels(labels []*datafile.Label, output *CallsSummariseOutput) {
	for _, l := range labels {
		updateFilterStats(l, output)
		updateReviewStatus(l, output)
	}
}

// updateFilterStats increments filter-level statistics for a single label
func updateFilterStats(l *datafile.Label, output *CallsSummariseOutput) {
	fs, exists := output.Filters[l.Filter]
	if !exists {
		fs = FilterStats{
			Segments:  0,
			Species:   make(map[string]int),
			Calltypes: make(map[string]map[string]int),
		}
	}
	fs.Segments++
	fs.Species[l.Species]++

	if l.CallType != "" {
		if fs.Calltypes[l.Species] == nil {
			fs.Calltypes[l.Species] = make(map[string]int)
		}
		fs.Calltypes[l.Species][l.CallType]++
	}
	output.Filters[l.Filter] = fs
}

// updateReviewStatus increments review status counters for a single label
func updateReviewStatus(l *datafile.Label, output *CallsSummariseOutput) {
	switch l.Certainty {
	case 100:
		output.ReviewStatus.Confirmed++
	case 0:
		output.ReviewStatus.DontKnow++
	default:
		output.ReviewStatus.Unreviewed++
	}
	if l.CallType != "" {
		output.ReviewStatus.WithCallType++
	}
	if l.Comment != "" {
		output.ReviewStatus.WithComments++
	}
	if l.Bookmark {
		output.ReviewStatus.Bookmarked++
	}
}

// finaliseSummary sorts output, cleans empty maps, and converts sets to sorted slices
func finaliseSummary(output *CallsSummariseOutput, operatorSet, reviewerSet map[string]bool, brief bool) {
	// Clean up empty calltypes maps
	for filter, fs := range output.Filters {
		if len(fs.Calltypes) == 0 {
			fs.Calltypes = nil
			output.Filters[filter] = fs
		}
	}

	// Convert sets to sorted slices
	for op := range operatorSet {
		output.Operators = append(output.Operators, op)
	}
	for r := range reviewerSet {
		output.Reviewers = append(output.Reviewers, r)
	}
	sort.Strings(output.Operators)
	sort.Strings(output.Reviewers)

	// Sort segments by file, then start time
	if !brief {
		sort.Slice(output.Segments, func(i, j int) bool {
			if output.Segments[i].File != output.Segments[j].File {
				return output.Segments[i].File < output.Segments[j].File
			}
			return output.Segments[i].StartTime < output.Segments[j].StartTime
		})
	}
}

// extractRelativePath extracts the audio filename from a .data file path
// e.g., "/folder/tx51_LISTENING_20260221_203004.WAV.data" -> "tx51_LISTENING_20260221_203004.WAV"
// Preserves the original case of the extension as-is.
func extractRelativePath(folder, dataPath string) string {
	// Get the filename
	filename := dataPath
	if idx := strings.LastIndex(dataPath, "/"); idx >= 0 {
		filename = dataPath[idx+1:]
	}

	// Remove .data extension, preserve everything else
	return strings.TrimSuffix(filename, ".data")
}
Fork channel

Rename channel

Delete channel