calls_summarise.go
package calls
import (
"sort"
"strings"
"skraak/datafile"
)
// CallsSummariseInput defines the input for the calls-summarise tool
type CallsSummariseInput struct {
Folder string `json:"folder"`
Brief bool `json:"brief"`
Filter string `json:"filter,omitempty"`
}
// CallsSummariseOutput defines the output for the calls-summarise tool
type CallsSummariseOutput struct {
Segments []SegmentSummary `json:"segments"`
Folder string `json:"folder"`
DataFilesRead int `json:"data_files_read"`
DataFilesSkipped []string `json:"data_files_skipped"`
TotalSegments int `json:"total_segments"`
Filters map[string]FilterStats `json:"filters"`
ReviewStatus ReviewStatus `json:"review_status"`
Operators []string `json:"operators"`
Reviewers []string `json:"reviewers"`
Error *string `json:"error,omitempty"`
}
// SegmentSummary represents a single segment in the output
type SegmentSummary struct {
File string `json:"file"`
StartTime float64 `json:"start_time"`
EndTime float64 `json:"end_time"`
Labels []LabelSummary `json:"labels"`
}
// LabelSummary represents a label in the output (omits empty fields)
type LabelSummary struct {
Filter string `json:"filter"`
Certainty int `json:"certainty"`
Species string `json:"species"`
CallType string `json:"calltype,omitempty"`
Comment string `json:"comment,omitempty"`
Bookmark bool `json:"bookmark,omitempty"`
}
// FilterStats contains per-filter statistics
type FilterStats struct {
Segments int `json:"segments"`
Species map[string]int `json:"species"`
Calltypes map[string]map[string]int `json:"calltypes,omitempty"` // species -> calltype -> count
}
// ReviewStatus contains review progress statistics
type ReviewStatus struct {
Unreviewed int `json:"unreviewed"` // certainty < 100
Confirmed int `json:"confirmed"` // certainty = 100
DontKnow int `json:"dont_know"` // certainty = 0
WithCallType int `json:"with_calltype"`
WithComments int `json:"with_comments"`
Bookmarked int `json:"bookmarked"`
}
// CallsSummarise reads all .data files in a folder and produces a summary
func CallsSummarise(input CallsSummariseInput) (CallsSummariseOutput, error) {
var output CallsSummariseOutput
// Find all .data files
filePaths, err := datafile.FindDataFiles(input.Folder)
if err != nil {
errMsg := err.Error()
output.Error = &errMsg
return output, err
}
// Initialize empty slices/maps (avoid null in JSON)
output.Segments = make([]SegmentSummary, 0)
output.Folder = input.Folder
output.Filters = make(map[string]FilterStats)
output.Operators = make([]string, 0)
output.Reviewers = make([]string, 0)
output.DataFilesSkipped = make([]string, 0)
if len(filePaths) == 0 {
return output, nil
}
// Track unique operators and reviewers
operatorSet := make(map[string]bool)
reviewerSet := make(map[string]bool)
summariseFiles(filePaths, input, &output, operatorSet, reviewerSet)
// Count segments for total
if input.Brief {
for _, fs := range output.Filters {
output.TotalSegments += fs.Segments
}
} else {
output.TotalSegments = len(output.Segments)
}
finaliseSummary(&output, operatorSet, reviewerSet, input.Brief)
return output, nil
}
// summariseFiles processes all data files, populating output stats
func summariseFiles(filePaths []string, input CallsSummariseInput, output *CallsSummariseOutput, operatorSet, reviewerSet map[string]bool) {
for _, path := range filePaths {
df, err := datafile.ParseDataFile(path)
if err != nil {
output.DataFilesSkipped = append(output.DataFilesSkipped, path)
continue
}
output.DataFilesRead++
trackMeta(df.Meta, operatorSet, reviewerSet)
var relPath string
if !input.Brief {
relPath = extractRelativePath(input.Folder, path)
}
for _, seg := range df.Segments {
filteredLabels := filterLabels(seg.Labels, input.Filter)
if input.Filter != "" && len(filteredLabels) == 0 {
continue
}
updateStatsFromLabels(filteredLabels, output)
if !input.Brief {
output.Segments = append(output.Segments, SegmentSummary{
File: relPath,
StartTime: seg.StartTime,
EndTime: seg.EndTime,
Labels: buildLabelSummaries(filteredLabels),
})
}
}
}
}
// trackMeta records operator and reviewer from file metadata
func trackMeta(meta *datafile.DataMeta, operatorSet, reviewerSet map[string]bool) {
if meta == nil {
return
}
if meta.Operator != "" {
operatorSet[meta.Operator] = true
}
if meta.Reviewer != "" {
reviewerSet[meta.Reviewer] = true
}
}
// filterLabels returns labels matching the filter, or all labels if filter is empty
func filterLabels(labels []*datafile.Label, filter string) []*datafile.Label {
if filter == "" {
return labels
}
var filtered []*datafile.Label
for _, l := range labels {
if l.Filter == filter {
filtered = append(filtered, l)
}
}
return filtered
}
// buildLabelSummaries converts labels to label summaries
func buildLabelSummaries(labels []*datafile.Label) []LabelSummary {
var summaries []LabelSummary
for _, l := range labels {
ls := LabelSummary{
Filter: l.Filter,
Certainty: l.Certainty,
Species: l.Species,
}
if l.CallType != "" {
ls.CallType = l.CallType
}
if l.Comment != "" {
ls.Comment = l.Comment
}
if l.Bookmark {
ls.Bookmark = true
}
summaries = append(summaries, ls)
}
return summaries
}
// updateStatsFromLabels updates filter stats and review status from a set of labels
func updateStatsFromLabels(labels []*datafile.Label, output *CallsSummariseOutput) {
for _, l := range labels {
updateFilterStats(l, output)
updateReviewStatus(l, output)
}
}
// updateFilterStats increments filter-level statistics for a single label
func updateFilterStats(l *datafile.Label, output *CallsSummariseOutput) {
fs, exists := output.Filters[l.Filter]
if !exists {
fs = FilterStats{
Segments: 0,
Species: make(map[string]int),
Calltypes: make(map[string]map[string]int),
}
}
fs.Segments++
fs.Species[l.Species]++
if l.CallType != "" {
if fs.Calltypes[l.Species] == nil {
fs.Calltypes[l.Species] = make(map[string]int)
}
fs.Calltypes[l.Species][l.CallType]++
}
output.Filters[l.Filter] = fs
}
// updateReviewStatus increments review status counters for a single label
func updateReviewStatus(l *datafile.Label, output *CallsSummariseOutput) {
switch l.Certainty {
case 100:
output.ReviewStatus.Confirmed++
case 0:
output.ReviewStatus.DontKnow++
default:
output.ReviewStatus.Unreviewed++
}
if l.CallType != "" {
output.ReviewStatus.WithCallType++
}
if l.Comment != "" {
output.ReviewStatus.WithComments++
}
if l.Bookmark {
output.ReviewStatus.Bookmarked++
}
}
// finaliseSummary sorts output, cleans empty maps, and converts sets to sorted slices
func finaliseSummary(output *CallsSummariseOutput, operatorSet, reviewerSet map[string]bool, brief bool) {
// Clean up empty calltypes maps
for filter, fs := range output.Filters {
if len(fs.Calltypes) == 0 {
fs.Calltypes = nil
output.Filters[filter] = fs
}
}
// Convert sets to sorted slices
for op := range operatorSet {
output.Operators = append(output.Operators, op)
}
for r := range reviewerSet {
output.Reviewers = append(output.Reviewers, r)
}
sort.Strings(output.Operators)
sort.Strings(output.Reviewers)
// Sort segments by file, then start time
if !brief {
sort.Slice(output.Segments, func(i, j int) bool {
if output.Segments[i].File != output.Segments[j].File {
return output.Segments[i].File < output.Segments[j].File
}
return output.Segments[i].StartTime < output.Segments[j].StartTime
})
}
}
// extractRelativePath extracts the audio filename from a .data file path
// e.g., "/folder/tx51_LISTENING_20260221_203004.WAV.data" -> "tx51_LISTENING_20260221_203004.WAV"
// Preserves the original case of the extension as-is.
func extractRelativePath(folder, dataPath string) string {
// Get the filename
filename := dataPath
if idx := strings.LastIndex(dataPath, "/"); idx >= 0 {
filename = dataPath[idx+1:]
}
// Remove .data extension, preserve everything else
return strings.TrimSuffix(filename, ".data")
}