mapping.go
// Package mapping provides types and utilities for translating .data file
// species/calltype names to database labels via a mapping JSON file.
//
// This is a leaf package: no imports of skraak/db or skraak/tools.
package mapping
import (
"encoding/json"
"fmt"
"os"
"sort"
"strings"
)
// SpeciesMapping maps .data species/calltype names to DB labels
type SpeciesMapping struct {
Species string `json:"species"`
Calltypes map[string]string `json:"calltypes,omitempty"`
}
// File represents the complete mapping file structure.
// Key is the .data file species name.
type File map[string]SpeciesMapping
// Load loads and parses a mapping JSON file
func Load(path string) (File, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("failed to read mapping file: %w", err)
}
var m File
if err := json.Unmarshal(data, &m); err != nil {
return nil, fmt.Errorf("failed to parse mapping JSON: %w", err)
}
// Validate non-empty
if len(m) == 0 {
return nil, fmt.Errorf("mapping file is empty")
}
// Validate each entry has species
for dataSpecies, sm := range m {
if sm.Species == "" {
return nil, fmt.Errorf("mapping entry '%s' has empty species field", dataSpecies)
}
}
return m, nil
}
// Mapping sentinels: special values for the SpeciesMapping.Species field.
//
// Negative marks a .data species as "confirmed empty" (Noise-equivalent):
// segments matching this name are treated as negative evidence — clips overlapping
// them emit an all-zero row when no positive species also overlaps.
//
// Ignore marks a .data species as "ignored entirely": segments matching
// this name neither label clips nor block them.
const (
Negative = "__NEGATIVE__"
Ignore = "__IGNORE__"
)
// Kind describes how a .data species should be treated.
type Kind int
const (
Real Kind = iota
Neg
Ign
)
// Classify returns the canonical class name and kind for a .data species.
// ok is false if dataSpecies is not present in the mapping.
// For Neg and Ign the canonical string is empty.
func (m File) Classify(dataSpecies string) (canonical string, kind Kind, ok bool) {
sm, exists := m[dataSpecies]
if !exists {
return "", Real, false
}
switch sm.Species {
case Negative:
return "", Neg, true
case Ignore:
return "", Ign, true
default:
return sm.Species, Real, true
}
}
// ValidateCoversSpecies returns the sorted list of species in speciesSet that
// are missing from the mapping. Empty result means full coverage.
func (m File) ValidateCoversSpecies(speciesSet map[string]bool) []string {
missing := make([]string, 0)
for s := range speciesSet {
if _, exists := m[s]; !exists {
missing = append(missing, s)
}
}
sort.Strings(missing)
return missing
}
// Classes returns the sorted unique non-sentinel canonical class names from the mapping.
// Used to build the CSV column header for clip-labels.
func (m File) Classes() []string {
set := make(map[string]bool)
for _, sm := range m {
switch sm.Species {
case Negative, Ignore, "":
continue
default:
set[sm.Species] = true
}
}
out := make([]string, 0, len(set))
for s := range set {
out = append(out, s)
}
sort.Strings(out)
return out
}
// GetDBSpecies returns the DB species label for a .data species
func (m File) GetDBSpecies(dataSpecies string) (string, bool) {
sm, exists := m[dataSpecies]
if !exists {
return "", false
}
return sm.Species, true
}
// GetDBCalltype returns the DB calltype label for a .data species/calltype.
// Returns the dataCalltype unchanged if no mapping exists.
func (m File) GetDBCalltype(dataSpecies, dataCalltype string) string {
sm, exists := m[dataSpecies]
if !exists || sm.Calltypes == nil {
return dataCalltype
}
if dbCT, ok := sm.Calltypes[dataCalltype]; ok {
return dbCT
}
return dataCalltype
}
// ValidationResult contains validation errors for a mapping
type ValidationResult struct {
MissingSpecies []string // .data species not in mapping
MissingDBSpecies []string // mapped species not in DB
MissingCalltypes map[string]string // "dataSpecies/dataCalltype" -> "dbSpecies/dbCalltype"
}
// HasErrors returns true if any validation errors exist
func (r ValidationResult) HasErrors() bool {
return len(r.MissingSpecies) > 0 ||
len(r.MissingDBSpecies) > 0 ||
len(r.MissingCalltypes) > 0
}
// Error returns a formatted error message
func (r ValidationResult) Error() string {
var parts []string
if len(r.MissingSpecies) > 0 {
parts = append(parts, fmt.Sprintf("species in .data but not in mapping: [%s]",
strings.Join(r.MissingSpecies, ", ")))
}
if len(r.MissingDBSpecies) > 0 {
parts = append(parts, fmt.Sprintf("mapped species not found in DB: [%s]",
strings.Join(r.MissingDBSpecies, ", ")))
}
if len(r.MissingCalltypes) > 0 {
var ctErrors []string
for k, v := range r.MissingCalltypes {
ctErrors = append(ctErrors, fmt.Sprintf("%s->%s", k, v))
}
sort.Strings(ctErrors)
parts = append(parts, fmt.Sprintf("calltypes not found in DB: [%s]",
strings.Join(ctErrors, ", ")))
}
return strings.Join(parts, "; ")
}