details: error instanceof Error ? error.message : String(error),
},
500
);
}
});
/**
* Get all filters used in a specific dataset
*
* @route GET /api/statistics/dataset-filters
* @authentication Required
* @query {string} datasetId - The dataset ID to analyze
* @returns {Object} Response containing:
* - data: Array of filter objects with id and name
* @description Returns all filters that have been used for labels in the specified dataset.
* Requires READ permission on the dataset.
*/
statistics.get("/dataset-filters", authenticate, async (c) => {
try {
const jwtPayload = (c as unknown as { jwtPayload: JWTPayload }).jwtPayload;
const userId = jwtPayload.sub;
// Get query parameters
const datasetId = c.req.query("datasetId");
// Validate required parameters
if (!datasetId) {
return c.json({
error: "Missing required query parameter: datasetId"
}, 400);
}
const db = createDatabase(c.env);
// Check if user has READ permission for this dataset
const hasPermission = await checkUserPermission(db, userId, datasetId, 'READ');
if (!hasPermission) {
return c.json({
error: "Access denied: No READ permission for this dataset"
}, 403);
}
// Get all unique filters used in this dataset
const filtersResult = await db.execute(sql`
SELECT DISTINCT f.id, f.name
FROM filter f
JOIN label l ON f.id = l.filter_id
JOIN selection s ON l.selection_id = s.id
JOIN file fl ON s.file_id = fl.id
JOIN cluster c ON fl.cluster_id = c.id
WHERE c.dataset_id = ${datasetId}
AND l.active = true
AND s.active = true
AND fl.active = true
AND c.active = true
AND f.active = true
ORDER BY f.name
`);
// Transform the result
const filterOptions = filtersResult.rows.map(row => ({
id: row.id as string,
name: row.name as string
}));
return c.json({
data: filterOptions
});
} catch (error) {
console.error("Error fetching dataset filters:", error);
return c.json(
{
error: "Failed to fetch dataset filters",
details: error instanceof Error ? error.message : String(error),
},
500
);
}
});
/**
* Calculate call rate per hour for a specific species and filter in a dataset
*
* @route GET /api/statistics/dataset-call-rate-per-hour
* @authentication Required
* @query {string} datasetId - The dataset ID to analyze
* @query {string} speciesId - The species ID to filter for
* @query {string} filterId - The filter ID to use for call detection
* @query {string} timeFilter - Time filter (civil_night, solar_night, civil_day, solar_day, all) - defaults to 'civil_night'
* @query {string} callTypeId - Optional call type ID to filter for specific call types
* @returns {Object} Response containing:
* - data: Array of hourly call rate statistics
* - hour_of_day: Hour (0-23)
* - call_count: Number of calls detected in that hour
* - total_recording_hours: Total recording time in that hour
* - call_rate_per_hour: Calls per recording hour
* @description Calculates the call rate per hour for a specific species in a dataset.
* Requires READ permission on the dataset.
*/
statistics.get("/dataset-call-rate-per-hour", authenticate, async (c) => {
try {
const jwtPayload = (c as unknown as { jwtPayload: JWTPayload }).jwtPayload;
const userId = jwtPayload.sub;
// Get query parameters
const datasetId = c.req.query("datasetId");
const speciesId = c.req.query("speciesId");
const filterId = c.req.query("filterId");
const timeFilter = c.req.query("timeFilter") || "civil_night";
const callTypeId = c.req.query("callTypeId"); // Optional parameter
// Validate required parameters
if (!datasetId) {
return c.json({
error: "Missing required query parameter: datasetId"
}, 400);
}
if (!speciesId) {
return c.json({
error: "Missing required query parameter: speciesId"
}, 400);
}
if (!filterId) {
return c.json({
error: "Missing required query parameter: filterId"
}, 400);
}
// Validate timeFilter
const validTimeFilters: TimeFilter[] = ["civil_night", "civil_day", "solar_night", "solar_day", "all"];
if (!validTimeFilters.includes(timeFilter as TimeFilter)) {
return c.json({
error: `Invalid timeFilter. Must be one of: ${validTimeFilters.join(", ")}`
}, 400);
}
const db = createDatabase(c.env);
// Check if user has read permission for this dataset
const hasPermission = await checkUserPermission(db, userId, datasetId, 'READ');
if (!hasPermission) {
return c.json({
error: "Access denied: No READ permission for this dataset"
}, 403);
}
// Build call type filter condition if callTypeId is provided
const callTypeFilter = callTypeId ? sql`
AND EXISTS (
SELECT 1 FROM label_subtype ls
WHERE ls.label_id = l.id
AND ls.calltype_id = ${callTypeId}
AND ls.active = true
)
` : sql``;
// Execute the call rate calculation query
let result;
if (timeFilter === "civil_night") {
result = await db.execute(sql`
WITH call_counts_per_hour AS (
SELECT
EXTRACT(HOUR FROM f.timestamp_local) as hour_of_day,
COUNT(l.id) as call_count,
COUNT(DISTINCT f.id) as files_with_calls
FROM selection s
JOIN label l ON s.id = l.selection_id
JOIN file f ON s.file_id = f.id
JOIN cluster c ON f.cluster_id = c.id
WHERE c.dataset_id = ${datasetId}
AND l.species_id = ${speciesId}
AND l.filter_id = ${filterId}
AND s.is_civil_night = true
AND l.active = true
AND s.active = true
AND f.active = true
AND c.active = true
${callTypeFilter}
GROUP BY EXTRACT(HOUR FROM f.timestamp_local)
),
all_files_per_hour AS (
SELECT
EXTRACT(HOUR FROM f.timestamp_local) as hour_of_day,
COUNT(f.id) as total_files,
SUM(f.duration) / 3600.0 as total_recording_hours
FROM file f
JOIN cluster c ON f.cluster_id = c.id
WHERE c.dataset_id = ${datasetId}
AND f.maybe_civil_night = true
AND f.active = true
AND c.active = true
GROUP BY EXTRACT(HOUR FROM f.timestamp_local)
)
SELECT
afph.hour_of_day,
COALESCE(ccph.call_count, 0) as call_count,
ROUND(afph.total_recording_hours::numeric, 2) as total_recording_hours,
CASE
WHEN afph.total_recording_hours > 0 THEN
ROUND((COALESCE(ccph.call_count, 0) / afph.total_recording_hours)::numeric, 2)
ELSE 0
END as call_rate_per_hour
FROM all_files_per_hour afph
LEFT JOIN call_counts_per_hour ccph ON afph.hour_of_day = ccph.hour_of_day
ORDER BY afph.hour_of_day
`);
} else if (timeFilter === "civil_day") {
result = await db.execute(sql`
WITH call_counts_per_hour AS (
SELECT
EXTRACT(HOUR FROM f.timestamp_local) as hour_of_day,
COUNT(l.id) as call_count,
COUNT(DISTINCT f.id) as files_with_calls
FROM selection s
JOIN label l ON s.id = l.selection_id
JOIN file f ON s.file_id = f.id
JOIN cluster c ON f.cluster_id = c.id
WHERE c.dataset_id = ${datasetId}
AND l.species_id = ${speciesId}
AND l.filter_id = ${filterId}
AND s.is_civil_night = false
AND l.active = true
AND s.active = true
AND f.active = true
AND c.active = true
${callTypeFilter}
GROUP BY EXTRACT(HOUR FROM f.timestamp_local)
),
all_files_per_hour AS (
SELECT
EXTRACT(HOUR FROM f.timestamp_local) as hour_of_day,
COUNT(f.id) as total_files,
SUM(f.duration) / 3600.0 as total_recording_hours
FROM file f
JOIN cluster c ON f.cluster_id = c.id
WHERE c.dataset_id = ${datasetId}
AND f.maybe_civil_night = false
AND f.active = true
AND c.active = true
GROUP BY EXTRACT(HOUR FROM f.timestamp_local)
)
SELECT
afph.hour_of_day,
COALESCE(ccph.call_count, 0) as call_count,
ROUND(afph.total_recording_hours::numeric, 2) as total_recording_hours,
CASE
WHEN afph.total_recording_hours > 0 THEN
ROUND((COALESCE(ccph.call_count, 0) / afph.total_recording_hours)::numeric, 2)
ELSE 0
END as call_rate_per_hour
FROM all_files_per_hour afph
LEFT JOIN call_counts_per_hour ccph ON afph.hour_of_day = ccph.hour_of_day
ORDER BY afph.hour_of_day
`);
} else if (timeFilter === "solar_night") {
result = await db.execute(sql`
WITH call_counts_per_hour AS (
SELECT
EXTRACT(HOUR FROM f.timestamp_local) as hour_of_day,
COUNT(l.id) as call_count,
COUNT(DISTINCT f.id) as files_with_calls
FROM selection s
JOIN label l ON s.id = l.selection_id
JOIN file f ON s.file_id = f.id
JOIN cluster c ON f.cluster_id = c.id
WHERE c.dataset_id = ${datasetId}
AND l.species_id = ${speciesId}
AND l.filter_id = ${filterId}
AND s.is_solar_night = true
AND l.active = true
AND s.active = true
AND f.active = true
AND c.active = true
${callTypeFilter}
GROUP BY EXTRACT(HOUR FROM f.timestamp_local)
),
all_files_per_hour AS (
SELECT
EXTRACT(HOUR FROM f.timestamp_local) as hour_of_day,
COUNT(f.id) as total_files,
SUM(f.duration) / 3600.0 as total_recording_hours
FROM file f
JOIN cluster c ON f.cluster_id = c.id
WHERE c.dataset_id = ${datasetId}
AND f.maybe_solar_night = true
AND f.active = true
AND c.active = true
GROUP BY EXTRACT(HOUR FROM f.timestamp_local)
)
SELECT
afph.hour_of_day,
COALESCE(ccph.call_count, 0) as call_count,
ROUND(afph.total_recording_hours::numeric, 2) as total_recording_hours,
CASE
WHEN afph.total_recording_hours > 0 THEN
ROUND((COALESCE(ccph.call_count, 0) / afph.total_recording_hours)::numeric, 2)
ELSE 0
END as call_rate_per_hour
FROM all_files_per_hour afph
LEFT JOIN call_counts_per_hour ccph ON afph.hour_of_day = ccph.hour_of_day
ORDER BY afph.hour_of_day
`);
} else if (timeFilter === "solar_day") {
result = await db.execute(sql`
WITH call_counts_per_hour AS (
SELECT
EXTRACT(HOUR FROM f.timestamp_local) as hour_of_day,
COUNT(l.id) as call_count,
COUNT(DISTINCT f.id) as files_with_calls
FROM selection s
JOIN label l ON s.id = l.selection_id
JOIN file f ON s.file_id = f.id
JOIN cluster c ON f.cluster_id = c.id
WHERE c.dataset_id = ${datasetId}
AND l.species_id = ${speciesId}
AND l.filter_id = ${filterId}
AND s.is_solar_night = false
AND l.active = true
AND s.active = true
AND f.active = true
AND c.active = true
${callTypeFilter}
GROUP BY EXTRACT(HOUR FROM f.timestamp_local)
),
all_files_per_hour AS (
SELECT
EXTRACT(HOUR FROM f.timestamp_local) as hour_of_day,
COUNT(f.id) as total_files,
SUM(f.duration) / 3600.0 as total_recording_hours
FROM file f
JOIN cluster c ON f.cluster_id = c.id
WHERE c.dataset_id = ${datasetId}
AND f.maybe_solar_night = false
AND f.active = true
AND c.active = true
GROUP BY EXTRACT(HOUR FROM f.timestamp_local)
)
SELECT
afph.hour_of_day,
COALESCE(ccph.call_count, 0) as call_count,
ROUND(afph.total_recording_hours::numeric, 2) as total_recording_hours,
CASE
WHEN afph.total_recording_hours > 0 THEN
ROUND((COALESCE(ccph.call_count, 0) / afph.total_recording_hours)::numeric, 2)
ELSE 0
END as call_rate_per_hour
FROM all_files_per_hour afph
LEFT JOIN call_counts_per_hour ccph ON afph.hour_of_day = ccph.hour_of_day
ORDER BY afph.hour_of_day
`);
} else {
// timeFilter === "all"
result = await db.execute(sql`
WITH call_counts_per_hour AS (
SELECT
EXTRACT(HOUR FROM f.timestamp_local) as hour_of_day,
COUNT(l.id) as call_count,
COUNT(DISTINCT f.id) as files_with_calls
FROM selection s
JOIN label l ON s.id = l.selection_id
JOIN file f ON s.file_id = f.id
JOIN cluster c ON f.cluster_id = c.id
WHERE c.dataset_id = ${datasetId}
AND l.species_id = ${speciesId}
AND l.filter_id = ${filterId}
AND l.active = true
AND s.active = true
AND f.active = true
AND c.active = true
${callTypeFilter}
GROUP BY EXTRACT(HOUR FROM f.timestamp_local)
),
all_files_per_hour AS (
SELECT
EXTRACT(HOUR FROM f.timestamp_local) as hour_of_day,
COUNT(f.id) as total_files,
SUM(f.duration) / 3600.0 as total_recording_hours
FROM file f
JOIN cluster c ON f.cluster_id = c.id
WHERE c.dataset_id = ${datasetId}
AND f.active = true
AND c.active = true
GROUP BY EXTRACT(HOUR FROM f.timestamp_local)
)
SELECT
afph.hour_of_day,
COALESCE(ccph.call_count, 0) as call_count,
ROUND(afph.total_recording_hours::numeric, 2) as total_recording_hours,
CASE
WHEN afph.total_recording_hours > 0 THEN
ROUND((COALESCE(ccph.call_count, 0) / afph.total_recording_hours)::numeric, 2)
ELSE 0
END as call_rate_per_hour
FROM all_files_per_hour afph
LEFT JOIN call_counts_per_hour ccph ON afph.hour_of_day = ccph.hour_of_day
ORDER BY afph.hour_of_day
`);
}
// Transform the result to match our expected format
const callRateData: CallRatePerHour[] = result.rows.map(row => ({
hour_of_day: Number(row.hour_of_day),
call_count: Number(row.call_count),
total_recording_hours: Number(row.total_recording_hours),
call_rate_per_hour: Number(row.call_rate_per_hour)
}));
const metadata: CallRateMetadata = {
datasetId,
speciesId,
filterId,
timeFilter,
totalHours: callRateData.length,
totalCalls: callRateData.reduce((sum, hour) => sum + hour.call_count, 0),
totalRecordingHours: callRateData.reduce((sum, hour) => sum + hour.total_recording_hours, 0)
};
return c.json({
data: callRateData,
metadata
});
} catch (error) {
console.error("Error calculating dataset call rate per hour:", error);
return c.json(
{
error: "Failed to calculate dataset call rate per hour",