7FCKFQUZIWQR7GYO7KNQDTZBM72PAE7EQBHSWM2J3MXBGDUVWQZAC
QVOFQKGCNHVCILE56RPBRLDIZGIUUBVMCV4CJLPH5R6Z7KNPJI6QC
4A64WM7PDB5LRDEIRPMKE3SFOULYEHTMMJ3PC4NR2N34ZKRY4XYQC
36IKBCJKMV6NPBP2TYPZFE7IY22K33VTKETTFYLB7AN2Y7TI6ZAQC
2SXAHYTJY4S3NBJVNGQZQOYACHU5MEPMV2HH3BGW4DTF42AQSBLAC
6WNXYJBM63UZH5WW6S73IAKVJBMMUMITJHEHA5OZQXVRBLWUXSWQC
HVD2NGYM4J2PKQ72SFMXLBGQPKPCGDRB54IK3ISOQZDWKGM3WGQQC
XTU6PGJEQEQ4WLCZDUQPDBB3H3JST4NO7QBPYJ3ZMKTZ4QH3U3OAC
QTSC7SK437F5SFMYSP74I6WRWU2KSKYQJNPCPSSXWRMIFZZPQ6WQC
M3JUJ2WWZGCVMBITKRM5FUJMHFYL2QRMXJUVRUE4AC2RF74AOL5AC
YX7LU4WRAUDMWS3DEDXZDSF6DXBHLYDWVSMSRK6KIW3MO6GRXSVQC
2WKGHT2TVFMQT7VUL5OCYVNE365QOYNRXA34LOZ4DBFJ2ZVB4XQQC
OSNBT6AANZB3TF7HAJ35N3Z2EGDU5VQ4LGQORKMA25ACMNV35CQQC
ZYT3JRERMYXLMJHLPZYQHAINVMPQLBKGGN7A4C7OTVZDY42ZLTKQC
4M3EBLTLSS2BRCM42ZP7WVD4YMRRLGV2P2XF47IAV5XHHJD52HTQC
ROQGXQWL2V363K3W7TVVYKIAX4N4IWRERN5BJ7NYJRRVB6OMIJ4QC
J2RLNDEXTGAV4BB6ANIIR7XJLJBHSB4NFQWSBWHNAFB6DMLGS5RAC
RLH37YB4D7O42IFM2T7GJG4AVVAURWBZ7AOTHAWR7YJZRG3JOPLQC
PVQBFR72OCQGYF2G2KDWNKBHWJ24N6D653X6KARBGUSYBIHIXPRQC
POIBWSL3JFHT2KN3STFSJX3INSYKEJTX6KSW3N7BVEKWX2GJ6T7QC
XEXJLBOH6HQAUZRUNH3CCPNUD4HRNCKMRZ5UJ6UUCO76KV6WUJAAC
LYPSC7BOH6T45FCPRHSCXILAJSJ74D5WSQTUIKPWD5ECXOYGUY5AC
4RBE543WLHA7PIYT4W7YEJPF6XKZ2UGKPJBQ3CTLJ44AOMGHCEYQC
7ESBJZLIH3TAERLH2HIZAAQHVNVHQWLWCOBLMJRFTIL33YITJNIAC
HM75N4NTZ4BBSSDC7TUSYOQ4SIF3G6KPZA5QRYCVCVRSKQVTJAXAC
JHFIJJSLVMQNYIDE6CXWUK5UTB7BTUSY7NCI33WKCWIRZHCSMBHQC
OWHNUYOKSGQTG6ZSVQZ3DNMYCW3IOEMXGRRS5QRRDAPIG4MOODNAC
U4CVCPSGPGYWJ4PA72HHHCHKJSQW3GU2QFK4YFSMKQOEM2MMY3PQC
UCDTBEK3CF6YT2H6V57HI6FAFW44BIYYAK3Z2QJ5LJE7QWX7OEYAC
* @param {string} datasetId - Required query parameter specifying the dataset to fetch locations from
* @param {number} [page=1] - Optional page number for pagination (starts at 1)
* @param {number} [pageSize=100] - Optional page size (1-100, defaults to 100)
* @body {Object} Dataset creation payload:
* - id: string (nanoid(12) - user generated)
* - name: string (required, max 255 chars)
* - description?: string (optional, max 255 chars)
* - public?: boolean (optional, defaults to false)
* - type?: string (optional, defaults to 'organise')
* - data: Array of location objects with id, name, latitude, longitude, description
* - pagination: Object with pagination metadata (currentPage, pageSize, totalPages, totalItems, etc.)
* @error 400 - If datasetId is missing or page is invalid
* @description Returns active locations for the specified dataset with pagination support
* - data: The created dataset object
* @error 400 - If required fields are missing or invalid
* @error 500 - If database operation fails
* @description Creates a new dataset for the authenticated user
* The user becomes the owner, creator, and modifier of the dataset
const userId = jwtPayload.sub; // User ID from JWT // Subject claim usually contains the user ID
const userId = jwtPayload.sub; // User ID from JWT
// Connect to the database first to check permissions
const sql = neon(c.env.DATABASE_URL);
const db = drizzle(sql);
// Get the dataset ID from query parameter
const datasetId = c.req.query("datasetId");
const page = parseInt(c.req.query("page") || "1", 10);
const pageSize = parseInt(c.req.query("pageSize") || "100", 10);
// Check if user has permission to create datasets (ADMIN or CURATOR roles)
const userRoleResult = await db
.select({ role: userRole.role })
.from(userRole)
.where(eq(userRole.userId, userId))
.limit(1);
// Limit page size between 1 and 100
const limitedPageSize = Math.min(Math.max(pageSize, 1), 100);
const offset = (page - 1) * limitedPageSize;
// Connect to the database
const sql = neon(c.env.DATABASE_URL);
const db = drizzle(sql);
// First, get total count for pagination
console.log("Counting locations for datasetId:", datasetId);
const countResult = await db
.select({
count: sqlExpr<number>`COUNT(*)`
})
.from(location)
.where(sqlExpr`${location.datasetId} = ${datasetId} AND ${location.active} = true`);
const totalLocations = Number(countResult[0].count);
const totalPages = Math.ceil(totalLocations / limitedPageSize);
// Query locations for the specified dataset with pagination (using 100 as default limit)
console.log("Querying locations for datasetId:", datasetId, "page:", page);
const results = await db.select({
id: location.id,
name: location.name,
latitude: location.latitude,
longitude: location.longitude,
description: location.description,
}).from(location)
.where(sqlExpr`${location.datasetId} = ${datasetId} AND ${location.active} = true`)
.orderBy(location.name)
.limit(limitedPageSize)
.offset(offset);
console.log("Found", results.length, "locations for dataset", datasetId, "page:", page);
return c.json({
data: results,
pagination: {
currentPage: page,
pageSize: limitedPageSize,
totalPages: totalPages,
totalItems: totalLocations,
hasNextPage: page < totalPages,
hasPreviousPage: page > 1,
}
});
} catch (error) {
console.error("Error fetching locations:", error);
return c.json(
{
error: "Failed to fetch locations",
details: error instanceof Error ? error.message : String(error),
},
500
);
}
});
/**
* Protected API route to fetch clusters for a specific location
*
* @route GET /api/clusters
* @authentication Required
* @param {string} locationId - Required query parameter specifying the location to fetch clusters from
* @returns {Object} Response containing:
* - data: Array of cluster objects with recording pattern information
* @error 400 - If locationId is missing
* @description Returns clusters for the specified location along with their recording patterns
* Performs a LEFT JOIN with the cyclicRecordingPattern table to include recording duration data
* Results are returned with recording pattern information embedded in each cluster object
*/
app.get("/api/clusters", authenticate, async (c) => {
try {
// Get the JWT payload (user info)
const jwtPayload = (c as unknown as { jwtPayload: JWTPayload }).jwtPayload;
const userId = jwtPayload.sub; // User ID from JWT // Subject claim usually contains the user ID
// Get the location ID from query parameter
const locationId = c.req.query("locationId");
console.log("Cluster API called with locationId:", locationId, "userId:", userId);
if (!locationId) {
console.log("Missing locationId in request");
if (!name || typeof name !== 'string' || name.trim().length === 0) {
// Connect to the database
const sql = neon(c.env.DATABASE_URL);
const db = drizzle(sql);
// Query clusters with a LEFT JOIN on recording patterns to avoid N+1 queries
console.log("Querying clusters for locationId:", locationId);
const joinedResults = await db.select({
// Cluster fields
id: cluster.id,
datasetId: cluster.datasetId,
locationId: cluster.locationId,
name: cluster.name,
description: cluster.description,
createdBy: cluster.createdBy,
createdAt: cluster.createdAt,
lastModified: cluster.lastModified,
modifiedBy: cluster.modifiedBy,
active: cluster.active,
timezoneId: cluster.timezoneId,
cyclicRecordingPatternId: cluster.cyclicRecordingPatternId,
sampleRate: cluster.sampleRate,
// Recording pattern fields
recordS: cyclicRecordingPattern.recordS,
sleepS: cyclicRecordingPattern.sleepS
})
.from(cluster)
.leftJoin(
cyclicRecordingPattern,
eq(cluster.cyclicRecordingPatternId, cyclicRecordingPattern.id)
)
.where(eq(cluster.locationId, locationId))
.orderBy(cluster.name);
console.log("Found", joinedResults.length, "clusters for location", locationId);
// Transform results to match the expected structure
const enrichedResults = joinedResults.map(row => ({
id: row.id,
datasetId: row.datasetId,
locationId: row.locationId,
name: row.name,
description: row.description,
createdBy: row.createdBy,
createdAt: row.createdAt,
lastModified: row.lastModified,
modifiedBy: row.modifiedBy,
active: row.active,
timezoneId: row.timezoneId,
cyclicRecordingPatternId: row.cyclicRecordingPatternId,
sampleRate: row.sampleRate,
recordingPattern: (row.recordS !== null && row.sleepS !== null) ? {
recordS: row.recordS,
sleepS: row.sleepS
} : null
}));
return c.json({
data: enrichedResults
});
} catch (error) {
console.error("Error fetching clusters:", error);
return c.json(
{
error: "Failed to fetch clusters",
details: error instanceof Error ? error.message : String(error),
},
500
);
}
});
/**
* Protected API route to fetch audio files for a specific cluster
*
* @route GET /api/files
* @authentication Required
* @param {string} clusterId - Required query parameter specifying the cluster to fetch files from
* @param {number} [page=1] - Optional page number for pagination (starts at 1)
* @param {number} [pageSize=100] - Optional page size (10-500, defaults to 100)
* @param {string} [solarNight] - Optional filter for files recorded during solar night ('true'/'false')
* @param {string} [civilNight] - Optional filter for files recorded during civil night ('true'/'false')
* @param {string} [speciesId] - Optional filter for files with selections labeled with specific species
* @returns {Object} Response containing:
* - data: Array of file objects with metadata, mothMetadata, and species information
* - pagination: Object with pagination metadata
* - filters: Object showing the filters that were applied
* @error 400 - If clusterId is missing or page is invalid
* @description Returns audio files for the specified cluster with comprehensive metadata:
* - Basic file information (name, path, timestamp, duration, etc.)
* - File metadata (JSON format)
* - Recording device metadata (gain, battery voltage, temperature)
* - Species found in each file
*
* When speciesId is provided, only returns files that have at least one
* selection labeled with the specified species.
*/
app.get("/api/files", authenticate, async (c) => {
try {
// Get query parameters
const clusterId = c.req.query("clusterId");
const page = parseInt(c.req.query("page") || "1", 10);
const pageSize = parseInt(c.req.query("pageSize") || "100", 10);
const solarNight = c.req.query("solarNight");
const civilNight = c.req.query("civilNight");
const speciesId = c.req.query("speciesId"); // Optional filter for species
// Validate parameters
if (!clusterId) {
console.log("Missing clusterId in request");
// Validate field lengths
if (id.length !== 12) {
// Validate and limit page size to prevent excessive queries
const limitedPageSize = Math.min(Math.max(pageSize, 10), 500);
const offset = (page - 1) * limitedPageSize;
// Validate page number
if (page < 1) {
console.log("Invalid page number in request:", page);
if (name.length > 255) {
}
// Connect to the database
const sql = neon(c.env.DATABASE_URL);
const db = drizzle(sql);
// Build filter conditions
let whereConditions = sqlExpr`${file.clusterId} = ${clusterId} AND ${file.active} = true`;
// Add filters for solarNight if specified
if (solarNight === 'true') {
whereConditions = sqlExpr`${whereConditions} AND ${file.maybeSolarNight} = true`;
} else if (solarNight === 'false') {
whereConditions = sqlExpr`${whereConditions} AND (${file.maybeSolarNight} = false OR ${file.maybeSolarNight} IS NULL)`;
}
// Add filters for civilNight if specified
if (civilNight === 'true') {
whereConditions = sqlExpr`${whereConditions} AND ${file.maybeCivilNight} = true`;
} else if (civilNight === 'false') {
whereConditions = sqlExpr`${whereConditions} AND (${file.maybeCivilNight} = false OR ${file.maybeCivilNight} IS NULL)`;
// First, get the total count of files for pagination metadata
let countResult;
if (speciesId) {
// Count only files that have at least one selection labeled with the specified species
countResult = await db
.select({
count: sqlExpr<number>`COUNT(DISTINCT ${file.id})`
})
.from(file)
.innerJoin(selection, eq(selection.fileId, file.id))
.innerJoin(label, eq(label.selectionId, selection.id))
.where(sqlExpr`${whereConditions} AND ${label.speciesId} = ${speciesId} AND ${label.active} = true`);
} else {
// Standard count without species filter
countResult = await db
.select({
count: sqlExpr<number>`COUNT(1)`
})
.from(file)
.where(whereConditions);
}
const totalFiles = Number(countResult[0].count);
const totalPages = Math.ceil(totalFiles / limitedPageSize);
// Query files for the specified cluster with pagination
let filesResult;
if (speciesId) {
// Get only files that have at least one selection labeled with the specified species
filesResult = await db
.select({
id: file.id,
fileName: file.fileName,
path: file.path,
timestampLocal: file.timestampLocal,
duration: file.duration,
sampleRate: file.sampleRate,
locationId: file.locationId,
description: file.description,
maybeSolarNight: file.maybeSolarNight,
maybeCivilNight: file.maybeCivilNight,
moonPhase: file.moonPhase,
})
.from(file)
.innerJoin(selection, eq(selection.fileId, file.id))
.innerJoin(label, eq(label.selectionId, selection.id))
.where(sqlExpr`${whereConditions} AND ${label.speciesId} = ${speciesId} AND ${label.active} = true`)
.orderBy(file.timestampLocal)
.groupBy(file.id, file.fileName, file.path, file.timestampLocal, file.duration,
file.sampleRate, file.locationId, file.description, file.maybeSolarNight,
file.maybeCivilNight, file.moonPhase)
.limit(limitedPageSize)
.offset(offset);
} else {
// Standard query without species filter
filesResult = await db
.select({
id: file.id,
fileName: file.fileName,
path: file.path,
timestampLocal: file.timestampLocal,
duration: file.duration,
sampleRate: file.sampleRate,
locationId: file.locationId,
description: file.description,
maybeSolarNight: file.maybeSolarNight,
maybeCivilNight: file.maybeCivilNight,
moonPhase: file.moonPhase,
})
.from(file)
.where(whereConditions)
.orderBy(file.timestampLocal)
.limit(limitedPageSize)
.offset(offset);
if (description && description.length > 255) {
return c.json({
error: "Field 'description' must be 255 characters or less"
}, 400);
// Early return if no files found
if (filesResult.length === 0) {
// Validate type if provided
const validTypes = ['organise', 'test', 'train'];
const datasetType = type || 'organise';
if (!validTypes.includes(datasetType)) {
data: [],
pagination: {
currentPage: page,
pageSize: limitedPageSize,
totalPages: totalPages,
totalItems: totalFiles,
hasNextPage: page < totalPages,
hasPreviousPage: page > 1,
},
filters: {
solarNight: solarNight === 'true' ? true : solarNight === 'false' ? false : null,
civilNight: civilNight === 'true' ? true : civilNight === 'false' ? false : null,
speciesId: speciesId || null,
}
});
error: `Field 'type' must be one of: ${validTypes.join(', ')}`
}, 400);
// Get all file IDs for metadata queries
const fileIds = filesResult.map(f => f.id);
// Create the dataset
const now = new Date();
const newDataset = {
id: id.trim(),
name: name.trim(),
description: description?.trim() || null,
public: Boolean(isPublic),
type: datasetType,
createdBy: userId,
createdAt: now,
lastModified: now,
modifiedBy: userId,
owner: userId,
active: true,
};
// Insert the dataset
const result = await db.insert(dataset).values(newDataset).returning({
id: dataset.id,
name: dataset.name,
description: dataset.description,
public: dataset.public,
type: dataset.type,
createdAt: dataset.createdAt,
owner: dataset.owner,
});
console.log("Created dataset:", result[0].id, "for user:", userId);
return c.json({
data: result[0]
}, 201);
} catch (error) {
console.error("Error creating dataset:", error);
data: [],
pagination: {
currentPage: page,
pageSize: limitedPageSize,
totalPages: totalPages,
totalItems: totalFiles,
hasNextPage: page < totalPages,
hasPreviousPage: page > 1,
},
filters: {
solarNight: solarNight === 'true' ? true : solarNight === 'false' ? false : null,
civilNight: civilNight === 'true' ? true : civilNight === 'false' ? false : null,
speciesId: speciesId || null,
}
});
error: "A dataset with this ID already exists"
}, 400);
// Convert array to proper SQL format for IN clause
const fileIdsQuoted = fileIds.map(id => `'${id}'`).join(',');
// Execute metadata queries in parallel using Promise.all
const [metadataResults, mothMetadataResults, speciesResults] = await Promise.all([
// Fetch file metadata
db.select({
fileId: fileMetadata.fileId,
json: fileMetadata.json
})
.from(fileMetadata)
.where(sqlExpr`${fileMetadata.fileId} IN (${sqlExpr.raw(fileIdsQuoted)})`),
// Fetch moth metadata
db.select({
fileId: mothMetadata.fileId,
gain: mothMetadata.gain,
batteryV: mothMetadata.batteryV,
tempC: mothMetadata.tempC
})
.from(mothMetadata)
.where(sqlExpr`${mothMetadata.fileId} IN (${sqlExpr.raw(fileIdsQuoted)})`),
// Fetch species data via selections and labels
db.select({
fileId: selection.fileId,
speciesId: species.id,
speciesLabel: species.label,
ebirdCode: species.ebirdCode,
description: species.description
})
.from(selection)
.innerJoin(label, eq(label.selectionId, selection.id))
.innerJoin(species, eq(species.id, label.speciesId))
.where(
speciesId
? sqlExpr`${selection.fileId} IN (${sqlExpr.raw(fileIdsQuoted)}) AND ${label.active} = true AND ${label.speciesId} = ${speciesId}`
: sqlExpr`${selection.fileId} IN (${sqlExpr.raw(fileIdsQuoted)}) AND ${label.active} = true`
)
]);
// Process metadata results
const metadataMap = metadataResults.reduce((acc, item) => {
let processedJson = item.json;
try {
if (typeof item.json === 'string' && (item.json.startsWith('{') || item.json.startsWith('['))) {
processedJson = JSON.parse(item.json);
} else if (typeof item.json === 'string' && item.json.includes('\\"')) {
const unescaped = item.json.replace(/\\"/g, '"');
processedJson = JSON.parse(unescaped);
}
} catch (e) {
console.error("Error processing metadata JSON:", e);
processedJson = item.json;
}
acc[item.fileId] = processedJson;
return acc;
}, {} as Record<string, unknown>);
// Process moth metadata
const mothMetadataMap = mothMetadataResults.reduce((acc, item) => {
acc[item.fileId] = {
gain: item.gain,
batteryV: item.batteryV !== null ? Number(item.batteryV) : null,
tempC: item.tempC !== null ? Number(item.tempC) : null
};
return acc;
}, {} as Record<string, {
gain: string | null;
batteryV: number | null;
tempC: number | null;
}>);
// Process species data
const speciesMap = speciesResults.reduce((acc, item) => {
if (!acc[item.fileId]) {
acc[item.fileId] = [];
}
// Deduplicate species entries
const existingSpecies = acc[item.fileId].find(s => s.id === item.speciesId);
if (!existingSpecies) {
acc[item.fileId].push({
id: item.speciesId,
label: item.speciesLabel,
ebirdCode: item.ebirdCode,
description: item.description
});
}
return acc;
}, {} as Record<string, Array<{
id: string;
label: string;
ebirdCode: string | null;
description: string | null;
}>>);
// Combine file data with metadata
const files = filesResult.map(fileData => ({
...fileData,
metadata: metadataMap[fileData.id] || null,
mothMetadata: mothMetadataMap[fileData.id] || null,
species: speciesMap[fileData.id] || []
}));
// Return paginated results with metadata and filters
return c.json({
data: files,
pagination: {
currentPage: page,
pageSize: limitedPageSize,
totalPages: totalPages,
totalItems: totalFiles,
hasNextPage: page < totalPages,
hasPreviousPage: page > 1,
},
filters: {
solarNight: solarNight === 'true' ? true : solarNight === 'false' ? false : null,
civilNight: civilNight === 'true' ? true : civilNight === 'false' ? false : null,
speciesId: speciesId || null,
}
});
} catch (error) {
console.error("Error fetching files:", error);
* @param {string} datasetId - Required query parameter specifying the dataset to fetch species from
* @param {string} id - Dataset ID in URL path
* @body {Object} Dataset update payload:
* - name?: string (optional, max 255 chars)
* - description?: string (optional, max 255 chars)
* - public?: boolean (optional)
* - type?: string (optional)
* - active?: boolean (optional, for soft delete)
* - data: Array of species objects, each with an array of associated call types
* @error 400 - If datasetId is missing
* @description Returns species associated with the specified dataset along with their call types
* Each species object includes:
* - id, label, ebirdCode, description
* - callTypes: Array of call type objects with id and label
*
* Uses an efficient JOIN approach to fetch data in a single query to avoid N+1 query problems
* Results are transformed to provide a nested structure with call types inside species objects
* - data: The updated dataset object
* @error 400 - If fields are invalid or dataset not found
* @error 403 - If user doesn't own the dataset
* @error 500 - If database operation fails
* @description Updates an existing dataset owned by the authenticated user
* Only the dataset owner can modify it
// Get query parameter for datasetId
const datasetId = c.req.query("datasetId");
// Get the JWT payload (user info)
const jwtPayload = (c as unknown as { jwtPayload: JWTPayload }).jwtPayload;
const userId = jwtPayload.sub; // User ID from JWT
// Get dataset ID from URL parameters
const datasetId = c.req.param("id");
// Use JOIN approach to get species and call types in a single query
// This eliminates the N+1 query problem and reduces round trips to the database
const queryResults = await db
// First, check if the dataset exists and if the user owns it
const existingDataset = await db
.from(species)
.innerJoin(
speciesDataset,
eq(species.id, speciesDataset.speciesId)
)
.leftJoin(
callType,
sqlExpr`${callType.speciesId} = ${species.id} AND ${callType.active} = true`
)
.where(eq(speciesDataset.datasetId, datasetId))
.orderBy(species.label);
// If no results are found, return an empty array
if (queryResults.length === 0) {
.from(dataset)
.where(eq(dataset.id, datasetId))
.limit(1);
if (existingDataset.length === 0) {
// Transform the flat results into nested objects
// Using a Map for better performance with large datasets
const speciesMap = new Map();
// Check if user has permission to edit this dataset
const hasEditPermission = await checkUserPermission(db, userId, datasetId, 'EDIT');
queryResults.forEach(row => {
if (!speciesMap.has(row.id)) {
// Create a new species entry if not already in the map
speciesMap.set(row.id, {
id: row.id,
label: row.label,
ebirdCode: row.ebirdCode,
description: row.description,
callTypes: []
});
if (!hasEditPermission) {
return c.json({
error: "You don't have permission to modify this dataset"
}, 403);
}
// Validate fields if provided
if (name !== undefined) {
if (typeof name !== 'string' || name.trim().length === 0) {
return c.json({
error: "Invalid field: name must be a non-empty string"
}, 400);
// Add the call type if it exists and isn't already in the array
if (row.callTypeId) {
const species = speciesMap.get(row.id);
const existingCallType = species.callTypes.find((ct: { id: string }) => ct.id === row.callTypeId);
if (!existingCallType) {
species.callTypes.push({
id: row.callTypeId,
label: row.callTypeLabel
});
}
if (name.length > 255) {
return c.json({
error: "Field 'name' must be 255 characters or less"
}, 400);
});
// Convert map to array
const enrichedSpecies = Array.from(speciesMap.values());
// Return the enriched species data
return c.json({
data: enrichedSpecies
});
} catch (error) {
console.error("Error fetching species:", error);
return c.json(
{
error: "Failed to fetch species",
details: error instanceof Error ? error.message : String(error),
},
500
);
}
});
}
/**
* Protected API route to search eBird taxonomy
*
* @route GET /api/ebird/search
* @authentication Required
* @param {string} q - Search query (common name, scientific name, family, or species code)
* @returns {Object} Response containing:
* - data: Array of matching eBird taxonomy entries
* @description Searches the eBird taxonomy v2024 materialized view for species matching the query.
* Searches across primary_com_name, sci_name, family, and species_code fields.
*/
app.get("/api/ebird/search", authenticate, async (c) => {
try {
const query = c.req.query("q");
if (!query || query.trim().length === 0) {
if (description !== undefined && description !== null && description.length > 255) {
if (query.length < 2) {
return c.json({
error: "Query must be at least 2 characters long"
}, 400);
if (type !== undefined) {
const validTypes = ['organise', 'test', 'train'];
if (!validTypes.includes(type)) {
return c.json({
error: `Field 'type' must be one of: ${validTypes.join(', ')}`
}, 400);
}
// Use raw SQL query for materialized view until Drizzle typing issue is resolved
const results = await db.execute(
sqlExpr`
SELECT
id,
species_code as "speciesCode",
primary_com_name as "primaryComName",
sci_name as "sciName",
bird_order as "birdOrder",
family
FROM ebird_taxonomy_v2024
WHERE
LOWER(primary_com_name) LIKE ${searchTerm} OR
LOWER(sci_name) LIKE ${searchTerm} OR
LOWER(family) LIKE ${searchTerm} OR
LOWER(species_code) LIKE ${searchTerm}
ORDER BY primary_com_name
LIMIT 20
`
);
if (description !== undefined) {
updateData.description = description?.trim() || null;
}
if (isPublic !== undefined) {
updateData.public = Boolean(isPublic);
}
if (type !== undefined) {
updateData.type = type;
}
if (active !== undefined) {
updateData.active = Boolean(active);
}
// Update the dataset
const result = await db
.update(dataset)
.set(updateData)
.where(eq(dataset.id, datasetId))
.returning({
id: dataset.id,
name: dataset.name,
description: dataset.description,
public: dataset.public,
type: dataset.type,
createdAt: dataset.createdAt,
lastModified: dataset.lastModified,
owner: dataset.owner,
active: dataset.active,
});
if (result.length === 0) {
return c.json({
error: "Failed to update dataset"
}, 500);
}
console.log("Updated dataset:", result[0].id, "for user:", userId);
* @param {string} datasetId - Required query parameter specifying the dataset
* @param {string} speciesId - Required query parameter specifying the species to filter by
* @param {string} datasetId - Required query parameter specifying the dataset to fetch locations from
* @param {number} [pageSize=100] - Optional page size (10-500, defaults to 100)
* @param {string} [solarNight] - Optional filter for files recorded during solar night ('true'/'false')
* @param {string} [civilNight] - Optional filter for files recorded during civil night ('true'/'false')
* @param {number} [pageSize=100] - Optional page size (1-100, defaults to 100)
* - data: Array of file objects with metadata, mothMetadata, and species information
* - pagination: Object with pagination metadata
* - filters: Object showing the filters that were applied
* @error 400 - If datasetId or speciesId is missing or page is invalid
* @description Returns files that contain selections labeled with the specified species
* Designed for cross-cluster searches within a dataset
*
* Each file object includes:
* - Basic file information (name, path, timestamp, duration, etc.)
* - File metadata (JSON format)
* - Recording device metadata (gain, battery voltage, temperature)
* - Species information
*
* Uses efficient query optimization with:
* - Parallel Promise.all for metadata queries
* - Proper SQL JOINs to avoid N+1 query problems
* - Data transformation for optimal client-side consumption
* - data: Array of location objects with id, name, latitude, longitude, description
* - pagination: Object with pagination metadata (currentPage, pageSize, totalPages, totalItems, etc.)
* @error 400 - If datasetId is missing or page is invalid
* @description Returns active locations for the specified dataset with pagination support
// Get query parameters
// Get the JWT payload (user info)
const jwtPayload = (c as unknown as { jwtPayload: JWTPayload }).jwtPayload;
const userId = jwtPayload.sub; // User ID from JWT // Subject claim usually contains the user ID
// Get the dataset ID from query parameter
// Build base filter condition - active files with selections of the specified species
// This forms the core of our WHERE clause for both queries
let baseCondition = sqlExpr`
${file.active} = true
AND ${location.datasetId} = ${datasetId}
AND ${label.speciesId} = ${speciesId}
AND ${label.active} = true
`;
// Add filters for day/night if specified
if (solarNight === 'true') {
baseCondition = sqlExpr`${baseCondition} AND ${file.maybeSolarNight} = true`;
} else if (solarNight === 'false') {
baseCondition = sqlExpr`${baseCondition} AND (${file.maybeSolarNight} = false OR ${file.maybeSolarNight} IS NULL)`;
}
if (civilNight === 'true') {
baseCondition = sqlExpr`${baseCondition} AND ${file.maybeCivilNight} = true`;
} else if (civilNight === 'false') {
baseCondition = sqlExpr`${baseCondition} AND (${file.maybeCivilNight} = false OR ${file.maybeCivilNight} IS NULL)`;
}
// Get total count for pagination using a more efficient COUNT(1)
// First, get total count for pagination
console.log("Counting locations for datasetId:", datasetId);
.from(file)
.innerJoin(cluster, eq(file.clusterId, cluster.id))
.innerJoin(location, eq(cluster.locationId, location.id))
.innerJoin(selection, eq(selection.fileId, file.id))
.innerJoin(label, eq(label.selectionId, selection.id))
.where(baseCondition);
.from(location)
.where(sqlExpr`${location.datasetId} = ${datasetId} AND ${location.active} = true`);
// Early return if there are no matching files
if (totalFiles === 0) {
return c.json({
data: [],
pagination: {
currentPage: page,
pageSize: limitedPageSize,
totalPages: 0,
totalItems: 0,
hasNextPage: false,
hasPreviousPage: false,
},
filters: {
datasetId,
speciesId,
solarNight: solarNight === 'true' ? true : solarNight === 'false' ? false : null,
civilNight: civilNight === 'true' ? true : civilNight === 'false' ? false : null,
}
});
}
// Fetch files with pagination
const filesResult = await db
.select({
id: file.id,
fileName: file.fileName,
path: file.path,
timestampLocal: file.timestampLocal,
duration: file.duration,
sampleRate: file.sampleRate,
locationId: file.locationId,
clusterId: file.clusterId,
description: file.description,
maybeSolarNight: file.maybeSolarNight,
maybeCivilNight: file.maybeCivilNight,
moonPhase: file.moonPhase,
})
.from(file)
.innerJoin(cluster, eq(file.clusterId, cluster.id))
.innerJoin(location, eq(cluster.locationId, location.id))
.innerJoin(selection, eq(selection.fileId, file.id))
.innerJoin(label, eq(label.selectionId, selection.id))
.where(baseCondition)
.orderBy(file.timestampLocal)
.groupBy(file.id, file.fileName, file.path, file.timestampLocal, file.duration,
file.sampleRate, file.locationId, file.clusterId, file.description, file.maybeSolarNight,
file.maybeCivilNight, file.moonPhase)
// Query locations for the specified dataset with pagination (using 100 as default limit)
console.log("Querying locations for datasetId:", datasetId, "page:", page);
const results = await db.select({
id: location.id,
name: location.name,
latitude: location.latitude,
longitude: location.longitude,
description: location.description,
}).from(location)
.where(sqlExpr`${location.datasetId} = ${datasetId} AND ${location.active} = true`)
.orderBy(location.name)
// Safety check - shouldn't happen with our early return, but just in case
if (fileIds.length === 0) {
return c.json({
data: [],
pagination: {
currentPage: page,
pageSize: limitedPageSize,
totalPages: totalPages,
totalItems: totalFiles,
hasNextPage: page < totalPages,
hasPreviousPage: page > 1,
},
filters: {
datasetId,
speciesId,
solarNight: solarNight === 'true' ? true : solarNight === 'false' ? false : null,
civilNight: civilNight === 'true' ? true : civilNight === 'false' ? false : null,
}
});
}
// Convert file IDs to a properly formatted SQL string
const fileIdsQuoted = fileIds.map(id => `'${id}'`).join(',');
// Execute metadata queries in parallel using Promise.all for better performance
const [metadataResults, mothMetadataResults, speciesResults] = await Promise.all([
// Fetch file metadata
db.select({
fileId: fileMetadata.fileId,
json: fileMetadata.json
})
.from(fileMetadata)
.where(sqlExpr`${fileMetadata.fileId} IN (${sqlExpr.raw(fileIdsQuoted)})`),
// Fetch moth metadata
db.select({
fileId: mothMetadata.fileId,
gain: mothMetadata.gain,
batteryV: mothMetadata.batteryV,
tempC: mothMetadata.tempC
})
.from(mothMetadata)
.where(sqlExpr`${mothMetadata.fileId} IN (${sqlExpr.raw(fileIdsQuoted)})`),
// Fetch species data - pre-filtered to just get the relevant species
db.select({
fileId: selection.fileId,
speciesId: species.id,
speciesLabel: species.label,
ebirdCode: species.ebirdCode,
description: species.description
})
.from(selection)
.innerJoin(label, eq(label.selectionId, selection.id))
.innerJoin(species, eq(species.id, label.speciesId))
.where(sqlExpr`
${selection.fileId} IN (${sqlExpr.raw(fileIdsQuoted)})
AND ${label.speciesId} = ${speciesId}
AND ${label.active} = true
`)
]);
console.log("Found", results.length, "locations for dataset", datasetId, "page:", page);
// Process metadata results into maps for efficient lookups
const metadataMap = metadataResults.reduce((acc, item) => {
let processedJson = item.json;
try {
if (typeof item.json === 'string' && (item.json.startsWith('{') || item.json.startsWith('['))) {
processedJson = JSON.parse(item.json);
} else if (typeof item.json === 'string' && item.json.includes('\\"')) {
const unescaped = item.json.replace(/\\"/g, '"');
processedJson = JSON.parse(unescaped);
}
} catch (e) {
console.error("Error processing metadata JSON:", e);
processedJson = item.json;
}
acc[item.fileId] = processedJson;
return acc;
}, {} as Record<string, unknown>);
// Process moth metadata
const mothMetadataMap = mothMetadataResults.reduce((acc, item) => {
acc[item.fileId] = {
gain: item.gain,
batteryV: item.batteryV !== null ? Number(item.batteryV) : null,
tempC: item.tempC !== null ? Number(item.tempC) : null
};
return acc;
}, {} as Record<string, {
gain: string | null;
batteryV: number | null;
tempC: number | null;
}>);
// Process species data with Map for better performance
const speciesMap = new Map<string, Array<{
id: string;
label: string;
ebirdCode: string | null;
description: string | null;
}>>();
speciesResults.forEach(item => {
if (!speciesMap.has(item.fileId)) {
speciesMap.set(item.fileId, []);
}
// Get the current species array for this file
const fileSpecies = speciesMap.get(item.fileId)!;
// Check if we already have this species (deduplication)
const existingSpeciesIndex = fileSpecies.findIndex(s => s.id === item.speciesId);
if (existingSpeciesIndex === -1) {
// Add species if it doesn't already exist for this file
fileSpecies.push({
id: item.speciesId,
label: item.speciesLabel,
ebirdCode: item.ebirdCode,
description: item.description
});
}
});
// Combine file data with metadata in a single operation
const files = filesResult.map(fileData => ({
...fileData,
metadata: metadataMap[fileData.id] || null,
mothMetadata: mothMetadataMap[fileData.id] || null,
species: speciesMap.get(fileData.id) || []
}));
// Return paginated results with metadata and filters
/**
* Protected API route to create a new dataset
*
* @route POST /api/datasets
* @authentication Required
* @body {Object} Dataset creation payload:
* - id: string (nanoid(12) - user generated)
* - name: string (required, max 255 chars)
* - description?: string (optional, max 255 chars)
* - public?: boolean (optional, defaults to false)
* - type?: string (optional, defaults to 'organise')
* @returns {Object} Response containing:
* - data: The created dataset object
* @error 400 - If required fields are missing or invalid
* @error 500 - If database operation fails
* @description Creates a new dataset for the authenticated user
* The user becomes the owner, creator, and modifier of the dataset
*/
app.post("/api/datasets", authenticate, async (c) => {
try {
// Get the JWT payload (user info)
const jwtPayload = (c as unknown as { jwtPayload: JWTPayload }).jwtPayload;
const userId = jwtPayload.sub; // User ID from JWT
// Connect to the database first to check permissions
const sql = neon(c.env.DATABASE_URL);
const db = drizzle(sql);
// Check if user has permission to create datasets (ADMIN or CURATOR roles)
const userRoleResult = await db
.select({ role: userRole.role })
.from(userRole)
.where(eq(userRole.userId, userId))
.limit(1);
const userRoleName = userRoleResult.length > 0 ? userRoleResult[0].role : 'USER';
if (userRoleName !== 'ADMIN' && userRoleName !== 'CURATOR') {
return c.json({
error: "You don't have permission to create datasets"
}, 403);
}
// Parse request body
const body = await c.req.json();
const { id, name, description, public: isPublic, type } = body;
// Validate required fields
if (!id || typeof id !== 'string') {
return c.json({
error: "Missing or invalid required field: id"
}, 400);
}
if (!name || typeof name !== 'string' || name.trim().length === 0) {
return c.json({
error: "Missing or invalid required field: name"
}, 400);
}
// Validate field lengths
if (id.length !== 12) {
return c.json({
error: "Field 'id' must be exactly 12 characters (nanoid)"
}, 400);
}
if (name.length > 255) {
return c.json({
error: "Field 'name' must be 255 characters or less"
}, 400);
}
if (description && description.length > 255) {
return c.json({
error: "Field 'description' must be 255 characters or less"
}, 400);
}
// Validate type if provided
const validTypes = ['organise', 'test', 'train'];
const datasetType = type || 'organise';
if (!validTypes.includes(datasetType)) {
return c.json({
error: `Field 'type' must be one of: ${validTypes.join(', ')}`
}, 400);
}
// Create the dataset
const now = new Date();
const newDataset = {
id: id.trim(),
name: name.trim(),
description: description?.trim() || null,
public: Boolean(isPublic),
type: datasetType,
createdBy: userId,
createdAt: now,
lastModified: now,
modifiedBy: userId,
owner: userId,
active: true,
};
// Insert the dataset
const result = await db.insert(dataset).values(newDataset).returning({
id: dataset.id,
name: dataset.name,
description: dataset.description,
public: dataset.public,
type: dataset.type,
createdAt: dataset.createdAt,
owner: dataset.owner,
});
console.log("Created dataset:", result[0].id, "for user:", userId);
return c.json({
data: result[0]
}, 201);
} catch (error) {
console.error("Error creating dataset:", error);
// Handle unique constraint violations
if (error instanceof Error && error.message.includes('duplicate key')) {
return c.json({
error: "A dataset with this ID already exists"
}, 400);
}
details: error instanceof Error ? error.message : String(error),
},
500
);
}
});
// ============================================================================
// CLUSTERS
// ============================================================================
/**
* Protected API route to fetch clusters for a specific location
*
* @route GET /api/clusters
* @authentication Required
* @param {string} locationId - Required query parameter specifying the location to fetch clusters from
* @returns {Object} Response containing:
* - data: Array of cluster objects with recording pattern information
* @error 400 - If locationId is missing
* @description Returns clusters for the specified location along with their recording patterns
* Performs a LEFT JOIN with the cyclicRecordingPattern table to include recording duration data
* Results are returned with recording pattern information embedded in each cluster object
*/
app.get("/api/clusters", authenticate, async (c) => {
try {
// Get the JWT payload (user info)
const jwtPayload = (c as unknown as { jwtPayload: JWTPayload }).jwtPayload;
const userId = jwtPayload.sub; // User ID from JWT // Subject claim usually contains the user ID
// Get the location ID from query parameter
const locationId = c.req.query("locationId");
console.log("Cluster API called with locationId:", locationId, "userId:", userId);
if (!locationId) {
console.log("Missing locationId in request");
return c.json({
error: "Missing required query parameter: locationId"
}, 400);
}
// Connect to the database
const sql = neon(c.env.DATABASE_URL);
const db = drizzle(sql);
// Query clusters with a LEFT JOIN on recording patterns to avoid N+1 queries
console.log("Querying clusters for locationId:", locationId);
const joinedResults = await db.select({
// Cluster fields
id: cluster.id,
datasetId: cluster.datasetId,
locationId: cluster.locationId,
name: cluster.name,
description: cluster.description,
createdBy: cluster.createdBy,
createdAt: cluster.createdAt,
lastModified: cluster.lastModified,
modifiedBy: cluster.modifiedBy,
active: cluster.active,
timezoneId: cluster.timezoneId,
cyclicRecordingPatternId: cluster.cyclicRecordingPatternId,
sampleRate: cluster.sampleRate,
// Recording pattern fields
recordS: cyclicRecordingPattern.recordS,
sleepS: cyclicRecordingPattern.sleepS
})
.from(cluster)
.leftJoin(
cyclicRecordingPattern,
eq(cluster.cyclicRecordingPatternId, cyclicRecordingPattern.id)
)
.where(eq(cluster.locationId, locationId))
.orderBy(cluster.name);
console.log("Found", joinedResults.length, "clusters for location", locationId);
// Transform results to match the expected structure
const enrichedResults = joinedResults.map(row => ({
id: row.id,
datasetId: row.datasetId,
locationId: row.locationId,
name: row.name,
description: row.description,
createdBy: row.createdBy,
createdAt: row.createdAt,
lastModified: row.lastModified,
modifiedBy: row.modifiedBy,
active: row.active,
timezoneId: row.timezoneId,
cyclicRecordingPatternId: row.cyclicRecordingPatternId,
sampleRate: row.sampleRate,
recordingPattern: (row.recordS !== null && row.sleepS !== null) ? {
recordS: row.recordS,
sleepS: row.sleepS
} : null
}));
return c.json({
data: enrichedResults
});
} catch (error) {
console.error("Error fetching clusters:", error);
return c.json(
{
error: "Failed to fetch clusters",
details: error instanceof Error ? error.message : String(error),
},
500
);
}
});
// ============================================================================
// FILES
// ============================================================================
/**
* Protected API route to fetch audio files for a specific cluster
*
* @route GET /api/files
* @authentication Required
* @param {string} clusterId - Required query parameter specifying the cluster to fetch files from
* @param {number} [page=1] - Optional page number for pagination (starts at 1)
* @param {number} [pageSize=100] - Optional page size (10-500, defaults to 100)
* @param {string} [solarNight] - Optional filter for files recorded during solar night ('true'/'false')
* @param {string} [civilNight] - Optional filter for files recorded during civil night ('true'/'false')
* @param {string} [speciesId] - Optional filter for files with selections labeled with specific species
* @returns {Object} Response containing:
* - data: Array of file objects with metadata, mothMetadata, and species information
* - pagination: Object with pagination metadata
* - filters: Object showing the filters that were applied
* @error 400 - If clusterId is missing or page is invalid
* @description Returns audio files for the specified cluster with comprehensive metadata:
* - Basic file information (name, path, timestamp, duration, etc.)
* - File metadata (JSON format)
* - Recording device metadata (gain, battery voltage, temperature)
* - Species found in each file
*
* When speciesId is provided, only returns files that have at least one
* selection labeled with the specified species.
*/
app.get("/api/files", authenticate, async (c) => {
try {
// Get query parameters
const clusterId = c.req.query("clusterId");
const page = parseInt(c.req.query("page") || "1", 10);
const pageSize = parseInt(c.req.query("pageSize") || "100", 10);
const solarNight = c.req.query("solarNight");
const civilNight = c.req.query("civilNight");
const speciesId = c.req.query("speciesId"); // Optional filter for species
// Validate parameters
if (!clusterId) {
console.log("Missing clusterId in request");
return c.json({
error: "Missing required query parameter: clusterId"
}, 400);
}
// Validate and limit page size to prevent excessive queries
const limitedPageSize = Math.min(Math.max(pageSize, 10), 500);
const offset = (page - 1) * limitedPageSize;
// Validate page number
if (page < 1) {
console.log("Invalid page number in request:", page);
return c.json({
error: "Invalid page parameter: must be greater than 0"
}, 400);
}
// Connect to the database
const sql = neon(c.env.DATABASE_URL);
const db = drizzle(sql);
// Build filter conditions
let whereConditions = sqlExpr`${file.clusterId} = ${clusterId} AND ${file.active} = true`;
// Add filters for solarNight if specified
if (solarNight === 'true') {
whereConditions = sqlExpr`${whereConditions} AND ${file.maybeSolarNight} = true`;
} else if (solarNight === 'false') {
whereConditions = sqlExpr`${whereConditions} AND (${file.maybeSolarNight} = false OR ${file.maybeSolarNight} IS NULL)`;
}
// Add filters for civilNight if specified
if (civilNight === 'true') {
whereConditions = sqlExpr`${whereConditions} AND ${file.maybeCivilNight} = true`;
} else if (civilNight === 'false') {
whereConditions = sqlExpr`${whereConditions} AND (${file.maybeCivilNight} = false OR ${file.maybeCivilNight} IS NULL)`;
}
// First, get the total count of files for pagination metadata
let countResult;
if (speciesId) {
// Count only files that have at least one selection labeled with the specified species
countResult = await db
.select({
count: sqlExpr<number>`COUNT(DISTINCT ${file.id})`
})
.from(file)
.innerJoin(selection, eq(selection.fileId, file.id))
.innerJoin(label, eq(label.selectionId, selection.id))
.where(sqlExpr`${whereConditions} AND ${label.speciesId} = ${speciesId} AND ${label.active} = true`);
} else {
// Standard count without species filter
countResult = await db
.select({
count: sqlExpr<number>`COUNT(1)`
})
.from(file)
.where(whereConditions);
}
const totalFiles = Number(countResult[0].count);
const totalPages = Math.ceil(totalFiles / limitedPageSize);
// Query files for the specified cluster with pagination
let filesResult;
if (speciesId) {
// Get only files that have at least one selection labeled with the specified species
filesResult = await db
.select({
id: file.id,
fileName: file.fileName,
path: file.path,
timestampLocal: file.timestampLocal,
duration: file.duration,
sampleRate: file.sampleRate,
locationId: file.locationId,
description: file.description,
maybeSolarNight: file.maybeSolarNight,
maybeCivilNight: file.maybeCivilNight,
moonPhase: file.moonPhase,
})
.from(file)
.innerJoin(selection, eq(selection.fileId, file.id))
.innerJoin(label, eq(label.selectionId, selection.id))
.where(sqlExpr`${whereConditions} AND ${label.speciesId} = ${speciesId} AND ${label.active} = true`)
.orderBy(file.timestampLocal)
.groupBy(file.id, file.fileName, file.path, file.timestampLocal, file.duration,
file.sampleRate, file.locationId, file.description, file.maybeSolarNight,
file.maybeCivilNight, file.moonPhase)
.limit(limitedPageSize)
.offset(offset);
} else {
// Standard query without species filter
filesResult = await db
.select({
id: file.id,
fileName: file.fileName,
path: file.path,
timestampLocal: file.timestampLocal,
duration: file.duration,
sampleRate: file.sampleRate,
locationId: file.locationId,
description: file.description,
maybeSolarNight: file.maybeSolarNight,
maybeCivilNight: file.maybeCivilNight,
moonPhase: file.moonPhase,
})
.from(file)
.where(whereConditions)
.orderBy(file.timestampLocal)
.limit(limitedPageSize)
.offset(offset);
}
// Early return if no files found
if (filesResult.length === 0) {
return c.json({
data: [],
pagination: {
currentPage: page,
pageSize: limitedPageSize,
totalPages: totalPages,
totalItems: totalFiles,
hasNextPage: page < totalPages,
hasPreviousPage: page > 1,
},
filters: {
solarNight: solarNight === 'true' ? true : solarNight === 'false' ? false : null,
civilNight: civilNight === 'true' ? true : civilNight === 'false' ? false : null,
speciesId: speciesId || null,
}
});
}
// Get all file IDs for metadata queries
const fileIds = filesResult.map(f => f.id);
// Safety check - if no files found, return empty results
if (fileIds.length === 0) {
return c.json({
data: [],
pagination: {
currentPage: page,
pageSize: limitedPageSize,
totalPages: totalPages,
totalItems: totalFiles,
hasNextPage: page < totalPages,
hasPreviousPage: page > 1,
},
filters: {
solarNight: solarNight === 'true' ? true : solarNight === 'false' ? false : null,
civilNight: civilNight === 'true' ? true : civilNight === 'false' ? false : null,
speciesId: speciesId || null,
}
});
}
// Convert array to proper SQL format for IN clause
const fileIdsQuoted = fileIds.map(id => `'${id}'`).join(',');
// Execute metadata queries in parallel using Promise.all
const [metadataResults, mothMetadataResults, speciesResults] = await Promise.all([
// Fetch file metadata
db.select({
fileId: fileMetadata.fileId,
json: fileMetadata.json
})
.from(fileMetadata)
.where(sqlExpr`${fileMetadata.fileId} IN (${sqlExpr.raw(fileIdsQuoted)})`),
// Fetch moth metadata
db.select({
fileId: mothMetadata.fileId,
gain: mothMetadata.gain,
batteryV: mothMetadata.batteryV,
tempC: mothMetadata.tempC
})
.from(mothMetadata)
.where(sqlExpr`${mothMetadata.fileId} IN (${sqlExpr.raw(fileIdsQuoted)})`),
// Fetch species data via selections and labels
db.select({
fileId: selection.fileId,
speciesId: species.id,
speciesLabel: species.label,
ebirdCode: species.ebirdCode,
description: species.description
})
.from(selection)
.innerJoin(label, eq(label.selectionId, selection.id))
.innerJoin(species, eq(species.id, label.speciesId))
.where(
speciesId
? sqlExpr`${selection.fileId} IN (${sqlExpr.raw(fileIdsQuoted)}) AND ${label.active} = true AND ${label.speciesId} = ${speciesId}`
: sqlExpr`${selection.fileId} IN (${sqlExpr.raw(fileIdsQuoted)}) AND ${label.active} = true`
)
]);
// Process metadata results
const metadataMap = metadataResults.reduce((acc, item) => {
let processedJson = item.json;
try {
if (typeof item.json === 'string' && (item.json.startsWith('{') || item.json.startsWith('['))) {
processedJson = JSON.parse(item.json);
} else if (typeof item.json === 'string' && item.json.includes('\\"')) {
const unescaped = item.json.replace(/\\"/g, '"');
processedJson = JSON.parse(unescaped);
}
} catch (e) {
console.error("Error processing metadata JSON:", e);
processedJson = item.json;
}
acc[item.fileId] = processedJson;
return acc;
}, {} as Record<string, unknown>);
// Process moth metadata
const mothMetadataMap = mothMetadataResults.reduce((acc, item) => {
acc[item.fileId] = {
gain: item.gain,
batteryV: item.batteryV !== null ? Number(item.batteryV) : null,
tempC: item.tempC !== null ? Number(item.tempC) : null
};
return acc;
}, {} as Record<string, {
gain: string | null;
batteryV: number | null;
tempC: number | null;
}>);
// Process species data
const speciesMap = speciesResults.reduce((acc, item) => {
if (!acc[item.fileId]) {
acc[item.fileId] = [];
}
// Deduplicate species entries
const existingSpecies = acc[item.fileId].find(s => s.id === item.speciesId);
if (!existingSpecies) {
acc[item.fileId].push({
id: item.speciesId,
label: item.speciesLabel,
ebirdCode: item.ebirdCode,
description: item.description
});
}
return acc;
}, {} as Record<string, Array<{
id: string;
label: string;
ebirdCode: string | null;
description: string | null;
}>>);
// Combine file data with metadata
const files = filesResult.map(fileData => ({
...fileData,
metadata: metadataMap[fileData.id] || null,
mothMetadata: mothMetadataMap[fileData.id] || null,
species: speciesMap[fileData.id] || []
}));
// Return paginated results with metadata and filters
return c.json({
data: files,
pagination: {
currentPage: page,
pageSize: limitedPageSize,
totalPages: totalPages,
totalItems: totalFiles,
hasNextPage: page < totalPages,
hasPreviousPage: page > 1,
},
filters: {
solarNight: solarNight === 'true' ? true : solarNight === 'false' ? false : null,
civilNight: civilNight === 'true' ? true : civilNight === 'false' ? false : null,
speciesId: speciesId || null,
}
});
} catch (error) {
console.error("Error fetching files:", error);
return c.json(
{
error: "Failed to fetch files",
* Protected API route to fetch species and call types for a dataset
*
* @route GET /api/species
* @authentication Required
* @param {string} datasetId - Required query parameter specifying the dataset to fetch species from
* @returns {Object} Response containing:
* - data: Array of species objects, each with an array of associated call types
* @error 400 - If datasetId is missing
* @description Returns species associated with the specified dataset along with their call types
* Each species object includes:
* - id, label, ebirdCode, description
* - callTypes: Array of call type objects with id and label
*
* Uses an efficient JOIN approach to fetch data in a single query to avoid N+1 query problems
* Results are transformed to provide a nested structure with call types inside species objects
*/
app.get("/api/species", authenticate, async (c) => {
try {
// Get query parameter for datasetId
const datasetId = c.req.query("datasetId");
// Validate parameters
if (!datasetId) {
return c.json({
error: "Missing required query parameter: datasetId"
}, 400);
}
// Connect to the database
const sql = neon(c.env.DATABASE_URL);
const db = drizzle(sql);
// Use JOIN approach to get species and call types in a single query
// This eliminates the N+1 query problem and reduces round trips to the database
const queryResults = await db
.select({
id: species.id,
label: species.label,
ebirdCode: species.ebirdCode,
description: species.description,
callTypeId: callType.id,
callTypeLabel: callType.label
})
.from(species)
.innerJoin(
speciesDataset,
eq(species.id, speciesDataset.speciesId)
)
.leftJoin(
callType,
sqlExpr`${callType.speciesId} = ${species.id} AND ${callType.active} = true`
)
.where(eq(speciesDataset.datasetId, datasetId))
.orderBy(species.label);
// If no results are found, return an empty array
if (queryResults.length === 0) {
return c.json({
data: []
});
}
// Transform the flat results into nested objects
// Using a Map for better performance with large datasets
const speciesMap = new Map();
queryResults.forEach(row => {
if (!speciesMap.has(row.id)) {
// Create a new species entry if not already in the map
speciesMap.set(row.id, {
id: row.id,
label: row.label,
ebirdCode: row.ebirdCode,
description: row.description,
callTypes: []
});
}
// Add the call type if it exists and isn't already in the array
if (row.callTypeId) {
const species = speciesMap.get(row.id);
const existingCallType = species.callTypes.find((ct: { id: string }) => ct.id === row.callTypeId);
if (!existingCallType) {
species.callTypes.push({
id: row.callTypeId,
label: row.callTypeLabel
});
}
}
});
// Convert map to array
const enrichedSpecies = Array.from(speciesMap.values());
// Return the enriched species data
return c.json({
data: enrichedSpecies
});
} catch (error) {
console.error("Error fetching species:", error);
return c.json(
{
error: "Failed to fetch species",
details: error instanceof Error ? error.message : String(error),
},
500
);
}
});
/**
* @param {string} id - Dataset ID in URL path
* @body {Object} Dataset update payload:
* - name?: string (optional, max 255 chars)
* - description?: string (optional, max 255 chars)
* - public?: boolean (optional)
* - type?: string (optional)
* - active?: boolean (optional, for soft delete)
* @param {string} q - Search query (common name, scientific name, family, or species code)
* - data: The updated dataset object
* @error 400 - If fields are invalid or dataset not found
* @error 403 - If user doesn't own the dataset
* @error 500 - If database operation fails
* @description Updates an existing dataset owned by the authenticated user
* Only the dataset owner can modify it
* - data: Array of matching eBird taxonomy entries
* @description Searches the eBird taxonomy v2024 materialized view for species matching the query.
* Searches across primary_com_name, sci_name, family, and species_code fields.
// Get the JWT payload (user info)
const jwtPayload = (c as unknown as { jwtPayload: JWTPayload }).jwtPayload;
const userId = jwtPayload.sub; // User ID from JWT
// Get dataset ID from URL parameters
const datasetId = c.req.param("id");
const query = c.req.query("q");
// First, check if the dataset exists and if the user owns it
const existingDataset = await db
.select({
id: dataset.id,
owner: dataset.owner,
active: dataset.active
})
.from(dataset)
.where(eq(dataset.id, datasetId))
.limit(1);
// Search across multiple fields with case-insensitive partial matching
const searchTerm = `%${query.trim().toLowerCase()}%`;
// Use raw SQL query for materialized view until Drizzle typing issue is resolved
const results = await db.execute(
sqlExpr`
SELECT
id,
species_code as "speciesCode",
primary_com_name as "primaryComName",
sci_name as "sciName",
bird_order as "birdOrder",
family
FROM ebird_taxonomy_v2024
WHERE
LOWER(primary_com_name) LIKE ${searchTerm} OR
LOWER(sci_name) LIKE ${searchTerm} OR
LOWER(family) LIKE ${searchTerm} OR
LOWER(species_code) LIKE ${searchTerm}
ORDER BY primary_com_name
LIMIT 20
`
);
if (existingDataset.length === 0) {
return c.json({
error: "Dataset not found"
}, 404);
}
return c.json({
data: results.rows || results
});
} catch (error) {
console.error("Error searching eBird taxonomy:", error);
return c.json(
{
error: "Failed to search eBird taxonomy",
details: error instanceof Error ? error.message : String(error),
},
500
);
}
});
// Check if user has permission to edit this dataset
const hasEditPermission = await checkUserPermission(db, userId, datasetId, 'EDIT');
// ============================================================================
// SELECTION
// ============================================================================
/**
* Protected API route to fetch files with selections for a specific dataset and species
*
* @route GET /api/selection
* @authentication Required
* @param {string} datasetId - Required query parameter specifying the dataset
* @param {string} speciesId - Required query parameter specifying the species to filter by
* @param {number} [page=1] - Optional page number for pagination (starts at 1)
* @param {number} [pageSize=100] - Optional page size (10-500, defaults to 100)
* @param {string} [solarNight] - Optional filter for files recorded during solar night ('true'/'false')
* @param {string} [civilNight] - Optional filter for files recorded during civil night ('true'/'false')
* @returns {Object} Response containing:
* - data: Array of file objects with metadata, mothMetadata, and species information
* - pagination: Object with pagination metadata
* - filters: Object showing the filters that were applied
* @error 400 - If datasetId or speciesId is missing or page is invalid
* @description Returns files that contain selections labeled with the specified species
* Designed for cross-cluster searches within a dataset
*
* Each file object includes:
* - Basic file information (name, path, timestamp, duration, etc.)
* - File metadata (JSON format)
* - Recording device metadata (gain, battery voltage, temperature)
* - Species information
*
* Uses efficient query optimization with:
* - Parallel Promise.all for metadata queries
* - Proper SQL JOINs to avoid N+1 query problems
* - Data transformation for optimal client-side consumption
*/
app.get("/api/selection", authenticate, async (c) => {
try {
// Get query parameters
const datasetId = c.req.query("datasetId");
const speciesId = c.req.query("speciesId");
const page = parseInt(c.req.query("page") || "1", 10);
const pageSize = parseInt(c.req.query("pageSize") || "100", 10);
const solarNight = c.req.query("solarNight");
const civilNight = c.req.query("civilNight");
// Validate fields if provided
if (name !== undefined) {
if (typeof name !== 'string' || name.trim().length === 0) {
return c.json({
error: "Invalid field: name must be a non-empty string"
}, 400);
}
if (name.length > 255) {
return c.json({
error: "Field 'name' must be 255 characters or less"
}, 400);
}
if (!speciesId) {
return c.json({
error: "Missing required query parameter: speciesId"
}, 400);
if (description !== undefined && description !== null && description.length > 255) {
// Validate and limit page size to prevent excessive queries
const limitedPageSize = Math.min(Math.max(pageSize, 10), 500);
const offset = (page - 1) * limitedPageSize;
// Validate page number
if (page < 1) {
if (type !== undefined) {
const validTypes = ['organise', 'test', 'train'];
if (!validTypes.includes(type)) {
return c.json({
error: `Field 'type' must be one of: ${validTypes.join(', ')}`
}, 400);
}
}
// Build update object with only provided fields
const updateData: Record<string, unknown> = {
lastModified: new Date(),
modifiedBy: userId,
};
if (name !== undefined) {
updateData.name = name.trim();
}
// Connect to the database
const sql = neon(c.env.DATABASE_URL);
const db = drizzle(sql);
if (description !== undefined) {
updateData.description = description?.trim() || null;
}
// Build base filter condition - active files with selections of the specified species
// This forms the core of our WHERE clause for both queries
let baseCondition = sqlExpr`
${file.active} = true
AND ${location.datasetId} = ${datasetId}
AND ${label.speciesId} = ${speciesId}
AND ${label.active} = true
`;
if (isPublic !== undefined) {
updateData.public = Boolean(isPublic);
// Add filters for day/night if specified
if (solarNight === 'true') {
baseCondition = sqlExpr`${baseCondition} AND ${file.maybeSolarNight} = true`;
} else if (solarNight === 'false') {
baseCondition = sqlExpr`${baseCondition} AND (${file.maybeSolarNight} = false OR ${file.maybeSolarNight} IS NULL)`;
if (type !== undefined) {
updateData.type = type;
if (civilNight === 'true') {
baseCondition = sqlExpr`${baseCondition} AND ${file.maybeCivilNight} = true`;
} else if (civilNight === 'false') {
baseCondition = sqlExpr`${baseCondition} AND (${file.maybeCivilNight} = false OR ${file.maybeCivilNight} IS NULL)`;
if (active !== undefined) {
updateData.active = Boolean(active);
// Get total count for pagination using a more efficient COUNT(1)
const countResult = await db
.select({
count: sqlExpr<number>`COUNT(DISTINCT ${file.id})`
})
.from(file)
.innerJoin(cluster, eq(file.clusterId, cluster.id))
.innerJoin(location, eq(cluster.locationId, location.id))
.innerJoin(selection, eq(selection.fileId, file.id))
.innerJoin(label, eq(label.selectionId, selection.id))
.where(baseCondition);
const totalFiles = Number(countResult[0].count);
const totalPages = Math.ceil(totalFiles / limitedPageSize);
// Early return if there are no matching files
if (totalFiles === 0) {
return c.json({
data: [],
pagination: {
currentPage: page,
pageSize: limitedPageSize,
totalPages: 0,
totalItems: 0,
hasNextPage: false,
hasPreviousPage: false,
},
filters: {
datasetId,
speciesId,
solarNight: solarNight === 'true' ? true : solarNight === 'false' ? false : null,
civilNight: civilNight === 'true' ? true : civilNight === 'false' ? false : null,
}
});
// Update the dataset
const result = await db
.update(dataset)
.set(updateData)
.where(eq(dataset.id, datasetId))
.returning({
id: dataset.id,
name: dataset.name,
description: dataset.description,
public: dataset.public,
type: dataset.type,
createdAt: dataset.createdAt,
lastModified: dataset.lastModified,
owner: dataset.owner,
active: dataset.active,
// Fetch files with pagination
const filesResult = await db
.select({
id: file.id,
fileName: file.fileName,
path: file.path,
timestampLocal: file.timestampLocal,
duration: file.duration,
sampleRate: file.sampleRate,
locationId: file.locationId,
clusterId: file.clusterId,
description: file.description,
maybeSolarNight: file.maybeSolarNight,
maybeCivilNight: file.maybeCivilNight,
moonPhase: file.moonPhase,
})
.from(file)
.innerJoin(cluster, eq(file.clusterId, cluster.id))
.innerJoin(location, eq(cluster.locationId, location.id))
.innerJoin(selection, eq(selection.fileId, file.id))
.innerJoin(label, eq(label.selectionId, selection.id))
.where(baseCondition)
.orderBy(file.timestampLocal)
.groupBy(file.id, file.fileName, file.path, file.timestampLocal, file.duration,
file.sampleRate, file.locationId, file.clusterId, file.description, file.maybeSolarNight,
file.maybeCivilNight, file.moonPhase)
.limit(limitedPageSize)
.offset(offset);
// Get all file IDs for metadata queries
const fileIds = filesResult.map(f => f.id);
// Safety check - shouldn't happen with our early return, but just in case
if (fileIds.length === 0) {
return c.json({
data: [],
pagination: {
currentPage: page,
pageSize: limitedPageSize,
totalPages: totalPages,
totalItems: totalFiles,
hasNextPage: page < totalPages,
hasPreviousPage: page > 1,
},
filters: {
datasetId,
speciesId,
solarNight: solarNight === 'true' ? true : solarNight === 'false' ? false : null,
civilNight: civilNight === 'true' ? true : civilNight === 'false' ? false : null,
}
console.log("Updated dataset:", result[0].id, "for user:", userId);
// Convert file IDs to a properly formatted SQL string
const fileIdsQuoted = fileIds.map(id => `'${id}'`).join(',');
// Execute metadata queries in parallel using Promise.all for better performance
const [metadataResults, mothMetadataResults, speciesResults] = await Promise.all([
// Fetch file metadata
db.select({
fileId: fileMetadata.fileId,
json: fileMetadata.json
})
.from(fileMetadata)
.where(sqlExpr`${fileMetadata.fileId} IN (${sqlExpr.raw(fileIdsQuoted)})`),
// Fetch moth metadata
db.select({
fileId: mothMetadata.fileId,
gain: mothMetadata.gain,
batteryV: mothMetadata.batteryV,
tempC: mothMetadata.tempC
})
.from(mothMetadata)
.where(sqlExpr`${mothMetadata.fileId} IN (${sqlExpr.raw(fileIdsQuoted)})`),
// Fetch species data - pre-filtered to just get the relevant species
db.select({
fileId: selection.fileId,
speciesId: species.id,
speciesLabel: species.label,
ebirdCode: species.ebirdCode,
description: species.description
})
.from(selection)
.innerJoin(label, eq(label.selectionId, selection.id))
.innerJoin(species, eq(species.id, label.speciesId))
.where(sqlExpr`
${selection.fileId} IN (${sqlExpr.raw(fileIdsQuoted)})
AND ${label.speciesId} = ${speciesId}
AND ${label.active} = true
`)
]);
// Process metadata results into maps for efficient lookups
const metadataMap = metadataResults.reduce((acc, item) => {
let processedJson = item.json;
try {
if (typeof item.json === 'string' && (item.json.startsWith('{') || item.json.startsWith('['))) {
processedJson = JSON.parse(item.json);
} else if (typeof item.json === 'string' && item.json.includes('\\"')) {
const unescaped = item.json.replace(/\\"/g, '"');
processedJson = JSON.parse(unescaped);
}
} catch (e) {
console.error("Error processing metadata JSON:", e);
processedJson = item.json;
}
acc[item.fileId] = processedJson;
return acc;
}, {} as Record<string, unknown>);
// Process moth metadata
const mothMetadataMap = mothMetadataResults.reduce((acc, item) => {
acc[item.fileId] = {
gain: item.gain,
batteryV: item.batteryV !== null ? Number(item.batteryV) : null,
tempC: item.tempC !== null ? Number(item.tempC) : null
};
return acc;
}, {} as Record<string, {
gain: string | null;
batteryV: number | null;
tempC: number | null;
}>);
// Process species data with Map for better performance
const speciesMap = new Map<string, Array<{
id: string;
label: string;
ebirdCode: string | null;
description: string | null;
}>>();
speciesResults.forEach(item => {
if (!speciesMap.has(item.fileId)) {
speciesMap.set(item.fileId, []);
}
// Get the current species array for this file
const fileSpecies = speciesMap.get(item.fileId)!;
// Check if we already have this species (deduplication)
const existingSpeciesIndex = fileSpecies.findIndex(s => s.id === item.speciesId);
if (existingSpeciesIndex === -1) {
// Add species if it doesn't already exist for this file
fileSpecies.push({
id: item.speciesId,
label: item.speciesLabel,
ebirdCode: item.ebirdCode,
description: item.description
});
}
});
// Combine file data with metadata in a single operation
const files = filesResult.map(fileData => ({
...fileData,
metadata: metadataMap[fileData.id] || null,
mothMetadata: mothMetadataMap[fileData.id] || null,
species: speciesMap.get(fileData.id) || []
}));
// Return paginated results with metadata and filters
data: result[0]
data: files,
pagination: {
currentPage: page,
pageSize: limitedPageSize,
totalPages: totalPages,
totalItems: totalFiles,
hasNextPage: page < totalPages,
hasPreviousPage: page > 1,
},
filters: {
datasetId,
speciesId,
solarNight: solarNight === 'true' ? true : solarNight === 'false' ? false : null,
civilNight: civilNight === 'true' ? true : civilNight === 'false' ? false : null,
}