CREATE TYPE dataset_type AS ENUM ('organise', 'test', 'train');
CREATE TABLE dataset (
id VARCHAR(12) PRIMARY KEY, name VARCHAR(255) NOT NULL,
description VARCHAR(255),
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
active BOOLEAN DEFAULT TRUE,
type dataset_type NOT NULL DEFAULT 'organise'
);
CREATE INDEX idx_dataset_name ON dataset(name);
CREATE INDEX idx_dataset_active ON dataset(active);
CREATE INDEX idx_dataset_public ON dataset(public);
CREATE TABLE location (
id VARCHAR(12) PRIMARY KEY, dataset_id VARCHAR(12) NOT NULL, name VARCHAR(140) NOT NULL,
latitude DECIMAL(10, 7) NOT NULL CHECK (latitude BETWEEN -90.0 AND 90.0), longitude DECIMAL(10, 7) NOT NULL CHECK (longitude BETWEEN -180.0 AND 180.0), description VARCHAR(255), created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
active BOOLEAN DEFAULT TRUE,
timezone_id VARCHAR(40) NOT NULL, FOREIGN KEY (dataset_id) REFERENCES dataset(id)
);
CREATE INDEX idx_location_name ON location(name);
CREATE INDEX idx_location_dataset ON location(dataset_id); CREATE INDEX idx_location_active ON location(active); CREATE INDEX idx_location_dataset_active ON location(dataset_id, active);
CREATE TABLE cyclic_recording_pattern (
id VARCHAR(12) PRIMARY KEY, record_s INTEGER NOT NULL,
sleep_s INTEGER NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
active BOOLEAN DEFAULT TRUE
);
CREATE INDEX idx_cyclic_recording_pattern_active ON cyclic_recording_pattern(active);
CREATE TABLE cluster (
id VARCHAR(12) PRIMARY KEY, dataset_id VARCHAR(12) NOT NULL, location_id VARCHAR(12) NOT NULL, name VARCHAR(140) NOT NULL,
description VARCHAR(255), created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
active BOOLEAN DEFAULT TRUE,
cyclic_recording_pattern_id VARCHAR(12),
sample_rate INTEGER NOT NULL,
FOREIGN KEY (dataset_id) REFERENCES dataset(id),
FOREIGN KEY (location_id) REFERENCES location(id),
FOREIGN KEY (cyclic_recording_pattern_id) REFERENCES cyclic_recording_pattern(id)
);
CREATE INDEX idx_cluster_dataset ON cluster(dataset_id);
CREATE INDEX idx_cluster_active ON cluster(active);
CREATE INDEX idx_cluster_dataset_active ON cluster(dataset_id, active);
CREATE INDEX idx_cluster_recording_pattern ON cluster(cyclic_recording_pattern_id);
CREATE INDEX idx_cluster_location_id ON cluster(location_id);
CREATE TYPE gain_level AS ENUM ('low', 'low-medium', 'medium', 'medium-high', 'high');
CREATE TABLE file (
id VARCHAR(21) PRIMARY KEY, file_name VARCHAR(255) NOT NULL,
path VARCHAR(255) NULL, xxh64_hash VARCHAR(16) NOT NULL, location_id VARCHAR(12) NOT NULL, timestamp_local TIMESTAMP WITH TIME ZONE NOT NULL, cluster_id VARCHAR(12), duration DECIMAL(7, 3) NOT NULL CHECK (duration > 0), sample_rate INTEGER NOT NULL,
description VARCHAR(255), maybe_solar_night BOOLEAN, maybe_civil_night BOOLEAN, moon_phase DECIMAL(3,2) CHECK (moon_phase BETWEEN 0.00 AND 1.00), created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
active BOOLEAN DEFAULT TRUE,
FOREIGN KEY (location_id) REFERENCES location(id),
FOREIGN KEY (cluster_id) REFERENCES cluster(id)
);
CREATE INDEX idx_file_location ON file(location_id);
CREATE INDEX idx_file_active ON file(active);
CREATE INDEX idx_file_timestamp_local ON file(timestamp_local);
CREATE INDEX idx_file_cluster ON file(cluster_id);
CREATE INDEX idx_file_maybe_solar_night ON file(maybe_solar_night);
CREATE INDEX idx_file_maybe_civil_night ON file(maybe_civil_night);
CREATE TABLE moth_metadata (
file_id VARCHAR(21) PRIMARY KEY,
timestamp TIMESTAMP WITH TIME ZONE NOT NULL,
recorder_id VARCHAR(16), gain gain_level NULL, battery_v DECIMAL(2, 1) CHECK (battery_v >= 0), temp_c DECIMAL(3, 1), created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
active BOOLEAN DEFAULT TRUE,
FOREIGN KEY (file_id) REFERENCES file(id)
);
CREATE INDEX idx_moth_metadata_active ON moth_metadata(active);
CREATE TABLE file_metadata (
file_id VARCHAR(21) PRIMARY KEY,
json JSON, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
active BOOLEAN DEFAULT TRUE,
FOREIGN KEY (file_id) REFERENCES file(id)
);
CREATE INDEX idx_file_metadata_active ON file_metadata(active);
CREATE TABLE file_dataset (
file_id VARCHAR(21) NOT NULL,
dataset_id VARCHAR(12) NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (file_id, dataset_id),
FOREIGN KEY (file_id) REFERENCES file(id),
FOREIGN KEY (dataset_id) REFERENCES dataset(id)
);
CREATE INDEX idx_file_dataset_file ON file_dataset(file_id);
CREATE INDEX idx_file_dataset_dataset ON file_dataset(dataset_id);
CREATE TABLE selection(
id VARCHAR(21) PRIMARY KEY, file_id VARCHAR(21) NOT NULL, dataset_id VARCHAR(12) NOT NULL, start_time DECIMAL(7,3) NOT NULL, end_time DECIMAL(7,3) NOT NULL, freq_low DECIMAL(9,3) CHECK (freq_low < 300000), freq_high DECIMAL(9,3) CHECK (freq_high < 300000), description VARCHAR(255), created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
active BOOLEAN DEFAULT TRUE,
FOREIGN KEY (file_id) REFERENCES file(id),
FOREIGN KEY (dataset_id) REFERENCES dataset(id)
);
CREATE INDEX idx_selection_file ON selection(file_id);
CREATE INDEX idx_selection_dataset ON selection(dataset_id);
CREATE INDEX idx_selection_active ON selection(active);
CREATE TABLE selection_metadata (
selection_id VARCHAR(21) PRIMARY KEY,
json JSON, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
active BOOLEAN DEFAULT TRUE,
FOREIGN KEY (selection_id) REFERENCES selection(id)
);
CREATE INDEX idx_selection_metadata_active ON selection_metadata(active);
CREATE TABLE ebird_taxonomy (
id VARCHAR(12) PRIMARY KEY,
taxonomy_version VARCHAR(4) NOT NULL,
taxon_order INTEGER NOT NULL,
category VARCHAR(15) NOT NULL,
species_code VARCHAR(15) NOT NULL,
taxon_concept_id VARCHAR(15),
primary_com_name VARCHAR(100) NOT NULL,
sci_name VARCHAR(100) NOT NULL,
bird_order VARCHAR(30),
family VARCHAR(100),
species_group VARCHAR(100),
report_as VARCHAR(15),
valid_from DATE NOT NULL,
valid_to DATE,
UNIQUE (species_code, taxonomy_version)
);
CREATE TABLE species (
id VARCHAR(12) PRIMARY KEY, label VARCHAR(100) NOT NULL, ebird_code VARCHAR(12), taxonomy_version VARCHAR(4),
description VARCHAR(255),
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
active BOOLEAN DEFAULT TRUE,
FOREIGN KEY (ebird_code, taxonomy_version) REFERENCES ebird_taxonomy(species_code, taxonomy_version)
);
CREATE INDEX idx_species_label ON species(label);
CREATE INDEX idx_species_ebird ON species(ebird_code);
CREATE TABLE call_type (
id VARCHAR(12) PRIMARY KEY, species_id VARCHAR(12) NOT NULL, label VARCHAR(100) NOT NULL, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
active BOOLEAN DEFAULT TRUE,
FOREIGN KEY (species_id) REFERENCES species(id)
);
CREATE INDEX idx_call_type_species ON call_type(species_id);
CREATE INDEX idx_call_type_label ON call_type(label);
CREATE TABLE filter (
id VARCHAR(12) PRIMARY KEY, name VARCHAR(140) NOT NULL,
description VARCHAR(255),
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
active BOOLEAN NOT NULL DEFAULT true
);
CREATE TABLE label (
id VARCHAR(21) PRIMARY KEY, selection_id VARCHAR(21) NOT NULL, species_id VARCHAR(12) NOT NULL, filter_id VARCHAR(12),
certainty DECIMAL(5,2) CHECK (certainty <= 100 AND certainty >= 0),
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
active BOOLEAN NOT NULL DEFAULT true,
FOREIGN KEY (selection_id) REFERENCES selection(id),
FOREIGN KEY (species_id) REFERENCES species(id),
FOREIGN KEY (filter_id) REFERENCES filter(id)
);
CREATE INDEX idx_label_selection_id ON label(selection_id);
CREATE INDEX idx_label_species_id ON label(species_id);
CREATE TABLE label_subtype (
id VARCHAR(21) PRIMARY KEY, label_id VARCHAR(21) NOT NULL, calltype_id VARCHAR(12) NOT NULL, filter_id VARCHAR(12),
certainty DECIMAL(5,2) CHECK (certainty <= 100 AND certainty >= 0),
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
active BOOLEAN NOT NULL DEFAULT true,
FOREIGN KEY (label_id) REFERENCES label(id),
FOREIGN KEY (calltype_id) REFERENCES call_type(id),
FOREIGN KEY (filter_id) REFERENCES filter(id)
);
CREATE TABLE ebird_taxonomy_v2024 AS
SELECT
id,
species_code,
primary_com_name,
sci_name,
bird_order, family
FROM ebird_taxonomy
WHERE taxonomy_version = '2024';
CREATE INDEX idx_ebird_name_search ON ebird_taxonomy_v2024 USING gin(to_tsvector('english', primary_com_name || ' ' || sci_name));
CREATE INDEX ebird_taxonomy_species_code ON ebird_taxonomy_v2024(species_code);
CREATE TABLE species_dataset (
species_id VARCHAR(12) NOT NULL,
dataset_id VARCHAR(12) NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (species_id, dataset_id),
FOREIGN KEY (species_id) REFERENCES species(id),
FOREIGN KEY (dataset_id) REFERENCES dataset(id)
);
CREATE INDEX idx_species_dataset_species ON species_dataset(species_id);
CREATE INDEX idx_species_dataset_dataset ON species_dataset(dataset_id);