EUEH65HBT4XZXCNWECJXNDEQAWR2NLNSAXFPXLMQ27NOVMQBJT5QC
app.get("/api/", (c) => c.json({ name: "Cloudflare" }));
// API routes
app.get("/api", (c) => c.json({ status: "ok" }));
// Datasets API route
app.get("/api/datasets", async (c) => {
try {
const sql = neon(c.env.DATABASE_URL);
const db = drizzle(sql);
const results = await db.select().from(dataset);
return c.json({
data: results,
});
} catch (error) {
console.error("Error fetching datasets:", error);
return c.json(
{
error: "Failed to fetch datasets",
details: error instanceof Error ? error.message : String(error),
},
500
);
}
});
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="35.93" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 228"><path fill="#00D8FF" d="M210.483 73.824a171.49 171.49 0 0 0-8.24-2.597c.465-1.9.893-3.777 1.273-5.621c6.238-30.281 2.16-54.676-11.769-62.708c-13.355-7.7-35.196.329-57.254 19.526a171.23 171.23 0 0 0-6.375 5.848a155.866 155.866 0 0 0-4.241-3.917C100.759 3.829 77.587-4.822 63.673 3.233C50.33 10.957 46.379 33.89 51.995 62.588a170.974 170.974 0 0 0 1.892 8.48c-3.28.932-6.445 1.924-9.474 2.98C17.309 83.498 0 98.307 0 113.668c0 15.865 18.582 31.778 46.812 41.427a145.52 145.52 0 0 0 6.921 2.165a167.467 167.467 0 0 0-2.01 9.138c-5.354 28.2-1.173 50.591 12.134 58.266c13.744 7.926 36.812-.22 59.273-19.855a145.567 145.567 0 0 0 5.342-4.923a168.064 168.064 0 0 0 6.92 6.314c21.758 18.722 43.246 26.282 56.54 18.586c13.731-7.949 18.194-32.003 12.4-61.268a145.016 145.016 0 0 0-1.535-6.842c1.62-.48 3.21-.974 4.76-1.488c29.348-9.723 48.443-25.443 48.443-41.52c0-15.417-17.868-30.326-45.517-39.844Zm-6.365 70.984c-1.4.463-2.836.91-4.3 1.345c-3.24-10.257-7.612-21.163-12.963-32.432c5.106-11 9.31-21.767 12.459-31.957c2.619.758 5.16 1.557 7.61 2.4c23.69 8.156 38.14 20.213 38.14 29.504c0 9.896-15.606 22.743-40.946 31.14Zm-10.514 20.834c2.562 12.94 2.927 24.64 1.23 33.787c-1.524 8.219-4.59 13.698-8.382 15.893c-8.067 4.67-25.32-1.4-43.927-17.412a156.726 156.726 0 0 1-6.437-5.87c7.214-7.889 14.423-17.06 21.459-27.246c12.376-1.098 24.068-2.894 34.671-5.345a134.17 134.17 0 0 1 1.386 6.193ZM87.276 214.515c-7.882 2.783-14.16 2.863-17.955.675c-8.075-4.657-11.432-22.636-6.853-46.752a156.923 156.923 0 0 1 1.869-8.499c10.486 2.32 22.093 3.988 34.498 4.994c7.084 9.967 14.501 19.128 21.976 27.15a134.668 134.668 0 0 1-4.877 4.492c-9.933 8.682-19.886 14.842-28.658 17.94ZM50.35 144.747c-12.483-4.267-22.792-9.812-29.858-15.863c-6.35-5.437-9.555-10.836-9.555-15.216c0-9.322 13.897-21.212 37.076-29.293c2.813-.98 5.757-1.905 8.812-2.773c3.204 10.42 7.406 21.315 12.477 32.332c-5.137 11.18-9.399 22.249-12.634 32.792a134.718 134.718 0 0 1-6.318-1.979Zm12.378-84.26c-4.811-24.587-1.616-43.134 6.425-47.789c8.564-4.958 27.502 2.111 47.463 19.835a144.318 144.318 0 0 1 3.841 3.545c-7.438 7.987-14.787 17.08-21.808 26.988c-12.04 1.116-23.565 2.908-34.161 5.309a160.342 160.342 0 0 1-1.76-7.887Zm110.427 27.268a347.8 347.8 0 0 0-7.785-12.803c8.168 1.033 15.994 2.404 23.343 4.08c-2.206 7.072-4.956 14.465-8.193 22.045a381.151 381.151 0 0 0-7.365-13.322Zm-45.032-43.861c5.044 5.465 10.096 11.566 15.065 18.186a322.04 322.04 0 0 0-30.257-.006c4.974-6.559 10.069-12.652 15.192-18.18ZM82.802 87.83a323.167 323.167 0 0 0-7.227 13.238c-3.184-7.553-5.909-14.98-8.134-22.152c7.304-1.634 15.093-2.97 23.209-3.984a321.524 321.524 0 0 0-7.848 12.897Zm8.081 65.352c-8.385-.936-16.291-2.203-23.593-3.793c2.26-7.3 5.045-14.885 8.298-22.6a321.187 321.187 0 0 0 7.257 13.246c2.594 4.48 5.28 8.868 8.038 13.147Zm37.542 31.03c-5.184-5.592-10.354-11.779-15.403-18.433c4.902.192 9.899.29 14.978.29c5.218 0 10.376-.117 15.453-.343c-4.985 6.774-10.018 12.97-15.028 18.486Zm52.198-57.817c3.422 7.8 6.306 15.345 8.596 22.52c-7.422 1.694-15.436 3.058-23.88 4.071a382.417 382.417 0 0 0 7.859-13.026a347.403 347.403 0 0 0 7.425-13.565Zm-16.898 8.101a358.557 358.557 0 0 1-12.281 19.815a329.4 329.4 0 0 1-23.444.823c-7.967 0-15.716-.248-23.178-.732a310.202 310.202 0 0 1-12.513-19.846h.001a307.41 307.41 0 0 1-10.923-20.627a310.278 310.278 0 0 1 10.89-20.637l-.001.001a307.318 307.318 0 0 1 12.413-19.761c7.613-.576 15.42-.876 23.31-.876H128c7.926 0 15.743.303 23.354.883a329.357 329.357 0 0 1 12.335 19.695a358.489 358.489 0 0 1 11.036 20.54a329.472 329.472 0 0 1-11 20.722Zm22.56-122.124c8.572 4.944 11.906 24.881 6.52 51.026c-.344 1.668-.73 3.367-1.15 5.09c-10.622-2.452-22.155-4.275-34.23-5.408c-7.034-10.017-14.323-19.124-21.64-27.008a160.789 160.789 0 0 1 5.888-5.4c18.9-16.447 36.564-22.941 44.612-18.3ZM128 90.808c12.625 0 22.86 10.235 22.86 22.86s-10.235 22.86-22.86 22.86s-22.86-10.235-22.86-22.86s10.235-22.86 22.86-22.86Z"></path></svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 76 98">
<path fill="url(#a)" d="m11 25 7 9s9-18 22-34c17 20 36 48 36 64 0 20-19 34-37 34C17 98 0 81 0 61c0-6 3-24 11-36Z"/>
<path fill="#F95" d="M39 21c47 51 14 66 0 66-11 0-51-11 0-66Z"/>
<defs>
<linearGradient id="a" x2="0%" y2="100%">
<stop stop-color="#F84"/>
<stop offset="100%" stop-color="#F30"/>
</linearGradient>
</defs>
</svg>
<?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 21.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
viewBox="0 0 822.8 355.5" style="enable-background:new 0 0 822.8 355.5;" xml:space="preserve">
<style type="text/css">
.st0{fill:#FFFFFF;}
.st1{fill:#FBAE40;}
.st2{fill:#F58220;}
</style>
<g id="Page-1">
<path id="CLOUDFLARE-_xAE_" class="st0" d="M772.2,252.6c-3.4,0-6.1-2.7-6.1-6.1c0-3.3,2.7-6.1,6.1-6.1c3.3,0,6.1,2.7,6.1,6.1
C778.3,249.8,775.5,252.6,772.2,252.6L772.2,252.6z M772.2,241.6c-2.7,0-4.9,2.2-4.9,4.9s2.2,4.9,4.9,4.9c2.7,0,4.9-2.2,4.9-4.9
S774.9,241.6,772.2,241.6L772.2,241.6z M775.3,249.7h-1.4l-1.2-2.3h-1.6v2.3h-1.3V243h3.2c1.4,0,2.3,0.9,2.3,2.2c0,1-0.6,1.7-1.4,2
L775.3,249.7z M772.9,246.2c0.5,0,1-0.3,1-1c0-0.8-0.4-1-1-1h-2v2H772.9z M136.7,239.8h15.6v42.5h27.1v13.6h-42.7V239.8z
M195.5,268v-0.2c0-16.1,13-29.2,30.3-29.2s30.1,12.9,30.1,29v0.2c0,16.1-13,29.2-30.3,29.2S195.5,284.1,195.5,268z M240.1,268
v-0.2c0-8.1-5.8-15.1-14.4-15.1c-8.5,0-14.2,6.9-14.2,15v0.2c0,8.1,5.8,15.1,14.3,15.1C234.4,283,240.1,276.1,240.1,268z
M275,271.3v-31.5h15.8V271c0,8.1,4.1,11.9,10.3,11.9c6.2,0,10.3-3.7,10.3-11.5v-31.6h15.8v31.1c0,18.1-10.3,26-26.3,26
C285,296.9,275,288.9,275,271.3z M351,239.8h21.6c20,0,31.7,11.5,31.7,27.7v0.2c0,16.2-11.8,28.2-32,28.2H351V239.8z M372.9,282.1
c9.3,0,15.5-5.1,15.5-14.2v-0.2c0-9-6.2-14.2-15.5-14.2h-6.3V282L372.9,282.1L372.9,282.1z M426.9,239.8h44.9v13.6h-29.4v9.6H469
v12.9h-26.6v20h-15.5V239.8z M493.4,239.8h15.5v42.5h27.2v13.6h-42.7V239.8z M576.7,239.4h15l23.9,56.5h-16.7l-4.1-10h-21.6l-4,10
h-16.3L576.7,239.4z M590.4,273.8l-6.2-15.9l-6.3,15.9H590.4z M635.6,239.8h26.5c8.6,0,14.5,2.2,18.3,6.1c3.3,3.2,5,7.5,5,13.1v0.2
c0,8.6-4.6,14.3-11.5,17.2l13.4,19.6h-18L658,279h-6.8v17h-15.6V239.8z M661.4,266.7c5.3,0,8.3-2.6,8.3-6.6v-0.2
c0-4.4-3.2-6.6-8.4-6.6h-10.2v13.4H661.4z M707.8,239.8h45.1V253h-29.7v8.5h26.9v12.3h-26.9v8.9h30.1v13.2h-45.5V239.8z
M102.7,274.6c-2.2,4.9-6.8,8.4-12.8,8.4c-8.5,0-14.3-7.1-14.3-15.1v-0.2c0-8.1,5.7-15,14.2-15c6.4,0,11.3,3.9,13.3,9.3h16.4
c-2.6-13.4-14.4-23.3-29.6-23.3c-17.3,0-30.3,13.1-30.3,29.2v0.2c0,16.1,12.8,29,30.1,29c14.8,0,26.4-9.6,29.4-22.4L102.7,274.6z"
/>
<path id="flare" class="st0" d="M734.5,150.4l-40.7-24.7c-0.6-0.1-4.4,0.3-6.4-0.7c-1.4-0.7-2.5-1.9-3.2-4c-3.2,0-175.5,0-175.5,0
v91.8h225.8V150.4z"/>
<path id="right-cloud" class="st1" d="M692.2,125.8c-0.8,0-1.5,0.6-1.8,1.4l-4.8,16.7c-2.1,7.2-1.3,13.8,2.2,18.7
c3.2,4.5,8.6,7.1,15.1,7.4l26.2,1.6c0.8,0,1.5,0.4,1.9,1c0.4,0.6,0.5,1.5,0.3,2.2c-0.4,1.2-1.6,2.1-2.9,2.2l-27.3,1.6
c-14.8,0.7-30.7,12.6-36.3,27.2l-2,5.1c-0.4,1,0.3,2,1.4,2H758c1.1,0,2.1-0.7,2.4-1.8c1.6-5.8,2.5-11.9,2.5-18.2
c0-37-30.2-67.2-67.3-67.2C694.5,125.7,693.3,125.7,692.2,125.8z"/>
<path id="left-cloud" class="st2" d="M656.4,204.6c2.1-7.2,1.3-13.8-2.2-18.7c-3.2-4.5-8.6-7.1-15.1-7.4L516,176.9
c-0.8,0-1.5-0.4-1.9-1c-0.4-0.6-0.5-1.4-0.3-2.2c0.4-1.2,1.6-2.1,2.9-2.2l124.2-1.6c14.7-0.7,30.7-12.6,36.3-27.2l7.1-18.5
c0.3-0.8,0.4-1.6,0.2-2.4c-8-36.2-40.3-63.2-78.9-63.2c-35.6,0-65.8,23-76.6,54.9c-7-5.2-15.9-8-25.5-7.1
c-17.1,1.7-30.8,15.4-32.5,32.5c-0.4,4.4-0.1,8.7,0.9,12.7c-27.9,0.8-50.2,23.6-50.2,51.7c0,2.5,0.2,5,0.5,7.5
c0.2,1.2,1.2,2.1,2.4,2.1h227.2c1.3,0,2.5-0.9,2.9-2.2L656.4,204.6z"/>
</g>
<g>
</g>
<g>
</g>
<g>
</g>
<g>
</g>
<g>
</g>
<g>
</g>
</svg>
const [name, setName] = useState("unknown");
const [datasets, setDatasets] = useState<string>("No data loaded");
const [loading, setLoading] = useState<boolean>(false);
const [error, setError] = useState<string | null>(null);
const fetchDatasets = async () => {
setLoading(true);
setError(null);
try {
const response = await fetch("/api/datasets");
if (!response.ok) {
throw new Error(`HTTP error! Status: ${response.status}`);
}
const data = await response.json() as DatasetsResponse;
setDatasets(JSON.stringify(data, null, 2));
} catch (err) {
setError(err instanceof Error ? err.message : "Failed to fetch datasets");
console.error("Error fetching datasets:", err);
} finally {
setLoading(false);
}
};
<div>
<a href="https://vite.dev" target="_blank">
<img src={viteLogo} className="logo" alt="Vite logo" />
</a>
<a href="https://react.dev" target="_blank">
<img src={reactLogo} className="logo react" alt="React logo" />
</a>
<a href="https://hono.dev/" target="_blank">
<img src={honoLogo} className="logo cloudflare" alt="Hono logo" />
</a>
<a href="https://workers.cloudflare.com/" target="_blank">
<img
src={cloudflareLogo}
className="logo cloudflare"
alt="Cloudflare logo"
/>
</a>
</div>
<h1>Vite + React + Hono + Cloudflare</h1>
<button
onClick={() => {
fetch("/api/")
.then((res) => res.json() as Promise<{ name: string }>)
.then((data) => setName(data.name));
}}
aria-label="get name"
<h2>Datasets</h2>
<button
onClick={fetchDatasets}
disabled={loading}
aria-label="fetch datasets"
.error {
color: #e53935;
background-color: #ffebee;
padding: 8px;
border-radius: 4px;
margin: 8px 0;
}
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="31.88" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 257"><defs><linearGradient id="IconifyId1813088fe1fbc01fb466" x1="-.828%" x2="57.636%" y1="7.652%" y2="78.411%"><stop offset="0%" stop-color="#41D1FF"></stop><stop offset="100%" stop-color="#BD34FE"></stop></linearGradient><linearGradient id="IconifyId1813088fe1fbc01fb467" x1="43.376%" x2="50.316%" y1="2.242%" y2="89.03%"><stop offset="0%" stop-color="#FFEA83"></stop><stop offset="8.333%" stop-color="#FFDD35"></stop><stop offset="100%" stop-color="#FFA800"></stop></linearGradient></defs><path fill="url(#IconifyId1813088fe1fbc01fb466)" d="M255.153 37.938L134.897 252.976c-2.483 4.44-8.862 4.466-11.382.048L.875 37.958c-2.746-4.814 1.371-10.646 6.827-9.67l120.385 21.517a6.537 6.537 0 0 0 2.322-.004l117.867-21.483c5.438-.991 9.574 4.796 6.877 9.62Z"></path><path fill="url(#IconifyId1813088fe1fbc01fb467)" d="M185.432.063L96.44 17.501a3.268 3.268 0 0 0-2.634 3.014l-5.474 92.456a3.268 3.268 0 0 0 3.997 3.378l24.777-5.718c2.318-.535 4.413 1.507 3.936 3.838l-7.361 36.047c-.495 2.426 1.782 4.5 4.151 3.78l15.304-4.649c2.372-.72 4.652 1.36 4.15 3.788l-11.698 56.621c-.732 3.542 3.979 5.473 5.943 2.437l1.313-2.028l72.516-144.72c1.215-2.423-.88-5.186-3.54-4.672l-25.505 4.922c-2.396.462-4.435-1.77-3.759-4.114l16.646-57.705c.677-2.35-1.37-4.583-3.769-4.113Z"></path></svg>
"cloudflare": {
"label": "Vite React Template",
"products": [
"Workers"
],
"categories": [],
"icon_urls": [
"https://imagedelivery.net/wSMYJvS3Xw-n339CbDyDIA/5ca0ca32-e897-4699-d4c1-6b680512f000/public"
],
"preview_image_url": "https://imagedelivery.net/wSMYJvS3Xw-n339CbDyDIA/fc7b4b62-442b-4769-641b-ad4422d74300/public",
"dash": true
},
},
"node_modules/@drizzle-team/brocli": {
"version": "0.10.2",
"resolved": "https://registry.npmjs.org/@drizzle-team/brocli/-/brocli-0.10.2.tgz",
"integrity": "sha512-z33Il7l5dKjUgGULTqBsQBQwckHh5AbIuxhdsIxDDiZAzBOrZO6q9ogcWC65kU382AfynTfgNumVcNIjuIua6w==",
"dev": true,
"license": "Apache-2.0"
}
},
"node_modules/@esbuild-kit/core-utils": {
"version": "3.3.2",
"resolved": "https://registry.npmjs.org/@esbuild-kit/core-utils/-/core-utils-3.3.2.tgz",
"integrity": "sha512-sPRAnw9CdSsRmEtnsl2WXWdyquogVpB3yZ3dgwJfe8zrOzTsV7cJvmwrKVa+0ma5BoiGJ+BoqkMvawbayKUsqQ==",
"deprecated": "Merged into tsx: https://tsx.is",
"dev": true,
"license": "MIT",
"dependencies": {
"esbuild": "~0.18.20",
"source-map-support": "^0.5.21"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/android-arm": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.18.20.tgz",
"integrity": "sha512-fyi7TDI/ijKKNZTUJAQqiG5T7YjJXgnzkURqmGj13C6dCqckZBLdl4h7bkhHt/t0WP+zO9/zwroDvANaOqO5Sw==",
"cpu": [
"arm"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/android-arm64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.18.20.tgz",
"integrity": "sha512-Nz4rJcchGDtENV0eMKUNa6L12zz2zBDXuhj/Vjh18zGqB44Bi7MBMSXjgunJgjRhCmKOjnPuZp4Mb6OKqtMHLQ==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/android-x64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.18.20.tgz",
"integrity": "sha512-8GDdlePJA8D6zlZYJV/jnrRAi6rOiNaCC/JclcXpB+KIuvfBN4owLtgzY2bsxnx666XjJx2kDPUmnTtR8qKQUg==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/darwin-arm64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.18.20.tgz",
"integrity": "sha512-bxRHW5kHU38zS2lPTPOyuyTm+S+eobPUnTNkdJEfAddYgEcll4xkT8DB9d2008DtTbl7uJag2HuE5NZAZgnNEA==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/darwin-x64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.18.20.tgz",
"integrity": "sha512-pc5gxlMDxzm513qPGbCbDukOdsGtKhfxD1zJKXjCCcU7ju50O7MeAZ8c4krSJcOIJGFR+qx21yMMVYwiQvyTyQ==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/freebsd-arm64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.18.20.tgz",
"integrity": "sha512-yqDQHy4QHevpMAaxhhIwYPMv1NECwOvIpGCZkECn8w2WFHXjEwrBn3CeNIYsibZ/iZEUemj++M26W3cNR5h+Tw==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"freebsd"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/freebsd-x64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.18.20.tgz",
"integrity": "sha512-tgWRPPuQsd3RmBZwarGVHZQvtzfEBOreNuxEMKFcd5DaDn2PbBxfwLcj4+aenoh7ctXcbXmOQIn8HI6mCSw5MQ==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"freebsd"
],
"engines": {
"node": ">=12"
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/linux-arm": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.18.20.tgz",
"integrity": "sha512-/5bHkMWnq1EgKr1V+Ybz3s1hWXok7mDFUMQ4cG10AfW3wL02PSZi5kFpYKrptDsgb2WAJIvRcDm+qIvXf/apvg==",
"cpu": [
"arm"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/linux-arm64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.18.20.tgz",
"integrity": "sha512-2YbscF+UL7SQAVIpnWvYwM+3LskyDmPhe31pE7/aoTMFKKzIc9lLbyGUpmmb8a8AixOL61sQ/mFh3jEjHYFvdA==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/linux-ia32": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.18.20.tgz",
"integrity": "sha512-P4etWwq6IsReT0E1KHU40bOnzMHoH73aXp96Fs8TIT6z9Hu8G6+0SHSw9i2isWrD2nbx2qo5yUqACgdfVGx7TA==",
"cpu": [
"ia32"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/linux-loong64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.18.20.tgz",
"integrity": "sha512-nXW8nqBTrOpDLPgPY9uV+/1DjxoQ7DoB2N8eocyq8I9XuqJ7BiAMDMf9n1xZM9TgW0J8zrquIb/A7s3BJv7rjg==",
"cpu": [
"loong64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/linux-mips64el": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.18.20.tgz",
"integrity": "sha512-d5NeaXZcHp8PzYy5VnXV3VSd2D328Zb+9dEq5HE6bw6+N86JVPExrA6O68OPwobntbNJ0pzCpUFZTo3w0GyetQ==",
"cpu": [
"mips64el"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/linux-ppc64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.18.20.tgz",
"integrity": "sha512-WHPyeScRNcmANnLQkq6AfyXRFr5D6N2sKgkFo2FqguP44Nw2eyDlbTdZwd9GYk98DZG9QItIiTlFLHJHjxP3FA==",
"cpu": [
"ppc64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/linux-riscv64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.18.20.tgz",
"integrity": "sha512-WSxo6h5ecI5XH34KC7w5veNnKkju3zBRLEQNY7mv5mtBmrP/MjNBCAlsM2u5hDBlS3NGcTQpoBvRzqBcRtpq1A==",
"cpu": [
"riscv64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/linux-s390x": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.18.20.tgz",
"integrity": "sha512-+8231GMs3mAEth6Ja1iK0a1sQ3ohfcpzpRLH8uuc5/KVDFneH6jtAJLFGafpzpMRO6DzJ6AvXKze9LfFMrIHVQ==",
"cpu": [
"s390x"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/linux-x64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.18.20.tgz",
"integrity": "sha512-UYqiqemphJcNsFEskc73jQ7B9jgwjWrSayxawS6UVFZGWrAAtkzjxSqnoclCXxWtfwLdzU+vTpcNYhpn43uP1w==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/netbsd-x64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.18.20.tgz",
"integrity": "sha512-iO1c++VP6xUBUmltHZoMtCUdPlnPGdBom6IrO4gyKPFFVBKioIImVooR5I83nTew5UOYrk3gIJhbZh8X44y06A==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"netbsd"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/openbsd-x64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.18.20.tgz",
"integrity": "sha512-e5e4YSsuQfX4cxcygw/UCPIEP6wbIL+se3sxPdCiMbFLBWu0eiZOJ7WoD+ptCLrmjZBK1Wk7I6D/I3NglUGOxg==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"openbsd"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/sunos-x64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.18.20.tgz",
"integrity": "sha512-kDbFRFp0YpTQVVrqUd5FTYmWo45zGaXe0X8E1G/LKFC0v8x0vWrhOWSLITcCn63lmZIxfOMXtCfti/RxN/0wnQ==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"sunos"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/win32-arm64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.18.20.tgz",
"integrity": "sha512-ddYFR6ItYgoaq4v4JmQQaAI5s7npztfV4Ag6NrhiaW0RrnOXqBkgwZLofVTlq1daVTQNhtI5oieTvkRPfZrePg==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/win32-ia32": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.18.20.tgz",
"integrity": "sha512-Wv7QBi3ID/rROT08SABTS7eV4hX26sVduqDOTe1MvGMjNd3EjOz4b7zeexIR62GTIEKrfJXKL9LFxTYgkyeu7g==",
"cpu": [
"ia32"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/win32-x64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.18.20.tgz",
"integrity": "sha512-kTdfRcSiDfQca/y9QIkng02avJ+NCaQvrMejlsB3RRv5sE9rRoeBPISaZpKxHELzRxZyLvNts1P27W3wV+8geQ==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild-kit/core-utils/node_modules/esbuild": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.18.20.tgz",
"integrity": "sha512-ceqxoedUrcayh7Y7ZX6NdbbDzGROiyVBgC4PriJThBKSVPWnnFHZAkfI1lJT8QFkOwH4qOS2SJkS4wvpGl8BpA==",
"dev": true,
"hasInstallScript": true,
"license": "MIT",
"bin": {
"esbuild": "bin/esbuild"
},
"engines": {
"node": ">=12"
},
"optionalDependencies": {
"@esbuild/android-arm": "0.18.20",
"@esbuild/android-arm64": "0.18.20",
"@esbuild/android-x64": "0.18.20",
"@esbuild/darwin-arm64": "0.18.20",
"@esbuild/darwin-x64": "0.18.20",
"@esbuild/freebsd-arm64": "0.18.20",
"@esbuild/freebsd-x64": "0.18.20",
"@esbuild/linux-arm": "0.18.20",
"@esbuild/linux-arm64": "0.18.20",
"@esbuild/linux-ia32": "0.18.20",
"@esbuild/linux-loong64": "0.18.20",
"@esbuild/linux-mips64el": "0.18.20",
"@esbuild/linux-ppc64": "0.18.20",
"@esbuild/linux-riscv64": "0.18.20",
"@esbuild/linux-s390x": "0.18.20",
"@esbuild/linux-x64": "0.18.20",
"@esbuild/netbsd-x64": "0.18.20",
"@esbuild/openbsd-x64": "0.18.20",
"@esbuild/sunos-x64": "0.18.20",
"@esbuild/win32-arm64": "0.18.20",
"@esbuild/win32-ia32": "0.18.20",
"@esbuild/win32-x64": "0.18.20"
}
},
"node_modules/@esbuild-kit/esm-loader": {
"version": "2.6.5",
"resolved": "https://registry.npmjs.org/@esbuild-kit/esm-loader/-/esm-loader-2.6.5.tgz",
"integrity": "sha512-FxEMIkJKnodyA1OaCUoEvbYRkoZlLZ4d/eXFu9Fh8CbBBgP5EmZxrfTRyN0qpXZ4vOvqnE5YdRdcrmUUXuU+dA==",
"deprecated": "Merged into tsx: https://tsx.is",
"dev": true,
"license": "MIT",
"dependencies": {
"@esbuild-kit/core-utils": "^3.3.2",
"get-tsconfig": "^4.7.0"
}
},
"node_modules/@neondatabase/serverless": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/@neondatabase/serverless/-/serverless-1.0.0.tgz",
"integrity": "sha512-XWmEeWpBXIoksZSDN74kftfTnXFEGZ3iX8jbANWBc+ag6dsiQuvuR4LgB0WdCOKMb5AQgjqgufc0TgAsZubUYw==",
"license": "MIT",
"dependencies": {
"@types/node": "^22.10.2",
"@types/pg": "^8.8.0"
},
"engines": {
"node": ">=19.0.0"
}
},
},
"node_modules/@types/node": {
"version": "22.14.1",
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.14.1.tgz",
"integrity": "sha512-u0HuPQwe/dHrItgHHpmw3N2fYCR6x4ivMNbPHRkBVP4CvN+kiRrKHWk3i8tXiO/joPwXLMYvF9TTF0eqgHIuOw==",
"license": "MIT",
"dependencies": {
"undici-types": "~6.21.0"
}
},
"node_modules/@types/pg": {
"version": "8.11.13",
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.11.13.tgz",
"integrity": "sha512-6kXByGkvRvwXLuyaWzsebs2du6+XuAB2CuMsuzP7uaihQahshVgSmB22Pmh0vQMkQ1h5+PZU0d+Di1o+WpVWJg==",
"license": "MIT",
"dependencies": {
"@types/node": "*",
"pg-protocol": "*",
"pg-types": "^4.0.1"
}
}
},
"node_modules/dotenv": {
"version": "16.5.0",
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.5.0.tgz",
"integrity": "sha512-m/C+AwOAr9/W1UOIZUo232ejMNnJAJtYQjUbHoNTBNTJSvqzzDh7vnrei3o3r3m9blf6ZoDkvcw0VmozNRFJxg==",
"dev": true,
"license": "BSD-2-Clause",
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://dotenvx.com"
}
},
"node_modules/drizzle-kit": {
"version": "0.31.0",
"resolved": "https://registry.npmjs.org/drizzle-kit/-/drizzle-kit-0.31.0.tgz",
"integrity": "sha512-pcKVT+GbfPA+bUovPIilgVOoq+onNBo/YQBG86sf3/GFHkN6lRJPm1l7dKN0IMAk57RQoIm4GUllRrasLlcaSg==",
"dev": true,
"license": "MIT",
"dependencies": {
"@drizzle-team/brocli": "^0.10.2",
"@esbuild-kit/esm-loader": "^2.5.5",
"esbuild": "^0.25.2",
"esbuild-register": "^3.5.0"
},
"bin": {
"drizzle-kit": "bin.cjs"
}
},
"node_modules/drizzle-orm": {
"version": "0.42.0",
"resolved": "https://registry.npmjs.org/drizzle-orm/-/drizzle-orm-0.42.0.tgz",
"integrity": "sha512-pS8nNJm2kBNZwrOjTHJfdKkaU+KuUQmV/vk5D57NojDq4FG+0uAYGMulXtYT///HfgsMF0hnFFvu1ezI3OwOkg==",
"license": "Apache-2.0",
"peerDependencies": {
"@aws-sdk/client-rds-data": ">=3",
"@cloudflare/workers-types": ">=4",
"@electric-sql/pglite": ">=0.2.0",
"@libsql/client": ">=0.10.0",
"@libsql/client-wasm": ">=0.10.0",
"@neondatabase/serverless": ">=0.10.0",
"@op-engineering/op-sqlite": ">=2",
"@opentelemetry/api": "^1.4.1",
"@planetscale/database": ">=1.13",
"@prisma/client": "*",
"@tidbcloud/serverless": "*",
"@types/better-sqlite3": "*",
"@types/pg": "*",
"@types/sql.js": "*",
"@vercel/postgres": ">=0.8.0",
"@xata.io/client": "*",
"better-sqlite3": ">=7",
"bun-types": "*",
"expo-sqlite": ">=14.0.0",
"gel": ">=2",
"knex": "*",
"kysely": "*",
"mysql2": ">=2",
"pg": ">=8",
"postgres": ">=3",
"sql.js": ">=1",
"sqlite3": ">=5"
},
"peerDependenciesMeta": {
"@aws-sdk/client-rds-data": {
"optional": true
},
"@cloudflare/workers-types": {
"optional": true
},
"@electric-sql/pglite": {
"optional": true
},
"@libsql/client": {
"optional": true
},
"@libsql/client-wasm": {
"optional": true
},
"@neondatabase/serverless": {
"optional": true
},
"@op-engineering/op-sqlite": {
"optional": true
},
"@opentelemetry/api": {
"optional": true
},
"@planetscale/database": {
"optional": true
},
"@prisma/client": {
"optional": true
},
"@tidbcloud/serverless": {
"optional": true
},
"@types/better-sqlite3": {
"optional": true
},
"@types/pg": {
"optional": true
},
"@types/sql.js": {
"optional": true
},
"@vercel/postgres": {
"optional": true
},
"@xata.io/client": {
"optional": true
},
"better-sqlite3": {
"optional": true
},
"bun-types": {
"optional": true
},
"expo-sqlite": {
"optional": true
},
"gel": {
"optional": true
},
"knex": {
"optional": true
},
"kysely": {
"optional": true
},
"mysql2": {
"optional": true
},
"pg": {
"optional": true
},
"postgres": {
"optional": true
},
"prisma": {
"optional": true
},
"sql.js": {
"optional": true
},
"sqlite3": {
"optional": true
}
}
},
"node_modules/esbuild-register": {
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/esbuild-register/-/esbuild-register-3.6.0.tgz",
"integrity": "sha512-H2/S7Pm8a9CL1uhp9OvjwrBh5Pvx0H8qVOxNu8Wed9Y7qv56MPtq+GGM8RJpq6glYJn9Wspr8uw7l55uyinNeg==",
"dev": true,
"license": "MIT",
"dependencies": {
"debug": "^4.3.4"
},
"peerDependencies": {
"esbuild": ">=0.12 <1"
}
},
"node_modules/get-tsconfig": {
"version": "4.10.0",
"resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.10.0.tgz",
"integrity": "sha512-kGzZ3LWWQcGIAmg6iWvXn0ei6WDtV26wzHRMwDSzmAbcXrTEXxHy6IehI6/4eT6VRKyMP1eF1VqwrVUmE/LR7A==",
"dev": true,
"license": "MIT",
"dependencies": {
"resolve-pkg-maps": "^1.0.0"
},
"funding": {
"url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
},
"node_modules/pg-int8": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/pg-int8/-/pg-int8-1.0.1.tgz",
"integrity": "sha512-WCtabS6t3c8SkpDBUlb1kjOs7l66xsGdKpIPZsg4wR+B3+u9UAum2odSsF9tnvxg80h4ZxLWMy4pRjOsFIqQpw==",
"license": "ISC",
"engines": {
"node": ">=4.0.0"
}
"node_modules/pg-numeric": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/pg-numeric/-/pg-numeric-1.0.2.tgz",
"integrity": "sha512-BM/Thnrw5jm2kKLE5uJkXqqExRUY/toLHda65XgFTBTFYZyopbKjBe29Ii3RbkvlsMoFwD+tHeGaCjjv0gHlyw==",
"license": "ISC",
"engines": {
"node": ">=4"
}
},
"node_modules/pg-protocol": {
"version": "1.8.0",
"resolved": "https://registry.npmjs.org/pg-protocol/-/pg-protocol-1.8.0.tgz",
"integrity": "sha512-jvuYlEkL03NRvOoyoRktBK7+qU5kOvlAwvmrH8sr3wbLrOdVWsRxQfz8mMy9sZFsqJ1hEWNfdWKI4SAmoL+j7g==",
"license": "MIT"
},
"node_modules/pg-types": {
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/pg-types/-/pg-types-4.0.2.tgz",
"integrity": "sha512-cRL3JpS3lKMGsKaWndugWQoLOCoP+Cic8oseVcbr0qhPzYD5DWXK+RZ9LY9wxRf7RQia4SCwQlXk0q6FCPrVng==",
"license": "MIT",
"dependencies": {
"pg-int8": "1.0.1",
"pg-numeric": "1.0.2",
"postgres-array": "~3.0.1",
"postgres-bytea": "~3.0.0",
"postgres-date": "~2.1.0",
"postgres-interval": "^3.0.0",
"postgres-range": "^1.1.1"
},
"engines": {
"node": ">=10"
}
},
"node_modules/postgres": {
"version": "3.4.5",
"resolved": "https://registry.npmjs.org/postgres/-/postgres-3.4.5.tgz",
"integrity": "sha512-cDWgoah1Gez9rN3H4165peY9qfpEo+SA61oQv65O3cRUE1pOEoJWwddwcqKE8XZYjbblOJlYDlLV4h67HrEVDg==",
"devOptional": true,
"license": "Unlicense",
"engines": {
"node": ">=12"
},
"funding": {
"type": "individual",
"url": "https://github.com/sponsors/porsager"
}
},
"node_modules/postgres-array": {
"version": "3.0.4",
"resolved": "https://registry.npmjs.org/postgres-array/-/postgres-array-3.0.4.tgz",
"integrity": "sha512-nAUSGfSDGOaOAEGwqsRY27GPOea7CNipJPOA7lPbdEpx5Kg3qzdP0AaWC5MlhTWV9s4hFX39nomVZ+C4tnGOJQ==",
"license": "MIT",
"engines": {
"node": ">=12"
}
},
"node_modules/postgres-bytea": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/postgres-bytea/-/postgres-bytea-3.0.0.tgz",
"integrity": "sha512-CNd4jim9RFPkObHSjVHlVrxoVQXz7quwNFpz7RY1okNNme49+sVyiTvTRobiLV548Hx/hb1BG+iE7h9493WzFw==",
"license": "MIT",
"dependencies": {
"obuf": "~1.1.2"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/postgres-date": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/postgres-date/-/postgres-date-2.1.0.tgz",
"integrity": "sha512-K7Juri8gtgXVcDfZttFKVmhglp7epKb1K4pgrkLxehjqkrgPhfG6OO8LHLkfaqkbpjNRnra018XwAr1yQFWGcA==",
"license": "MIT",
"engines": {
"node": ">=12"
}
},
"node_modules/postgres-interval": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/postgres-interval/-/postgres-interval-3.0.0.tgz",
"integrity": "sha512-BSNDnbyZCXSxgA+1f5UU2GmwhoI0aU5yMxRGO8CdFEcY2BQF9xm/7MqKnYoM1nJDk8nONNWDk9WeSmePFhQdlw==",
"license": "MIT",
"engines": {
"node": ">=12"
}
},
"node_modules/postgres-range": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/postgres-range/-/postgres-range-1.1.4.tgz",
"integrity": "sha512-i/hbxIE9803Alj/6ytL7UHQxRvZkI9O4Sy+J3HGc4F4oo/2eQAjTSNJ0bfxyse3bH0nuVesCk+3IRLaMtG3H6w==",
"license": "MIT"
},
}
},
"node_modules/resolve-pkg-maps": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
"integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
"dev": true,
"license": "MIT",
"funding": {
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
"node_modules/source-map-support": {
"version": "0.5.21",
"resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.21.tgz",
"integrity": "sha512-uBHU3L3czsIyYXKX88fdrGovxdSCoTGDRZ6SYXtSRxLZUzHg5P/66Ht6uoUlHu9EZod+inXhKo3qQgwXUT/y1w==",
"dev": true,
"license": "MIT",
"dependencies": {
"buffer-from": "^1.0.0",
"source-map": "^0.6.0"
}
},
"node_modules/tsx": {
"version": "4.19.3",
"resolved": "https://registry.npmjs.org/tsx/-/tsx-4.19.3.tgz",
"integrity": "sha512-4H8vUNGNjQ4V2EOoGw005+c+dGuPSnhpPBPHBtsZdGZBk/iJb4kguGlPWaZTZ3q5nMtFOEsY0nRDlh9PJyd6SQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"esbuild": "~0.25.0",
"get-tsconfig": "^4.7.5"
},
"bin": {
"tsx": "dist/cli.mjs"
},
"engines": {
"node": ">=18.0.0"
},
"optionalDependencies": {
"fsevents": "~2.3.3"
}
},
# Getting data into db
## pomona and 247 files
## pomona labels, avianz, 2023, 2025
roundtrip ebird taxonomy through duckdb to get it to import into postgres
CREATE TABLE imported_data AS
SELECT * FROM read_csv_auto('eBird_taxonomy_v2024.tsv',
delim='\t',
header=true,
auto_detect=true);
COPY imported_data TO 'eBird_taxonomy_v2024.tsv' (
FORMAT 'csv',
DELIMITER '\t',
HEADER true,
QUOTE '"', -- Use double quotes for quoting
ESCAPE '\', -- Use backslash as escape character
NULL '' -- How NULL values should appear
);
\COPY ebird_taxonomy FROM 'eBird_taxonomy_v2024.tsv' WITH (FORMAT csv, DELIMITER E'\t', HEADER, QUOTE '"', ESCAPE '\', NULL '');
CREATE SEQUENCE twenty_four_seven_sequence START 1;
CREATE TABLE twenty_four_seven_files_xxh64(
id INTEGER DEFAULT nextval('twenty_four_seven_sequence'),
drive VARCHAR,
location VARCHAR,
trip_date DATE,
file VARCHAR,
latitude DOUBLE,
longitude DOUBLE,
start_rec_period TIMESTAMP WITH TIME ZONE,
end_rec_period TIMESTAMP WITH TIME ZONE,
duration_s DOUBLE,
sample_rate INTEGER,
utc_date_time TIMESTAMP WITH TIME ZONE,
local_date_time TIMESTAMP WITH TIME ZONE,
moth_id VARCHAR,
gain VARCHAR,
battery_v DOUBLE,
temp_c DOUBLE,
sha2_256 VARCHAR,
night BOOLEAN,
xxh64 VARCHAR,
);
COPY twenty_four_seven_files_xxh64 (
drive, location, trip_date, file, latitude, longitude,
start_rec_period, end_rec_period, duration_s, sample_rate,
utc_date_time, local_date_time, moth_id, gain, battery_v,
temp_c, sha2_256, night, xxh64
) FROM '/Volumes/SSD1/DB_Files_with_xxh/twenty_four_seven_files_20250228_with_xxh64.csv';
COPY (
SELECT
location,
list(DISTINCT latitude) AS latitude_list,
list(DISTINCT longitude) AS longitude_list
FROM
twenty_four_seven_files_xxh64
GROUP BY
location
) TO 'location_twenty_four_seven.csv' (HEADER, DELIMITER '\t');
### now julia
## location table
df=CSV.read("location_twenty_four_seven.csv", DataFrame)
# do stuff ### made a mestake here, mixed up dataset_id and location_id
### i have dataset_id in field file.location_id
### see fix below
CSV.write("location.tsv", df,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
## clusters
COPY (
SELECT location, trip_date
FROM pomona_files_xxh64
GROUP BY location, trip_date
) TO 'cluster_pomona.csv' (HEADER, DELIMITER '\t');
# edit in easycsv, add columns, headers
location=CSV.read("location.tsv", DataFrame)
cluster=CSV.read("cluster_pomona.csv", DataFrame)
#do stuff to populate cluster
c=select(cluster, Not(:location_id))
l = select(location, :id => :location_id, :dataset_id, :name)
n = leftjoin(c, l, on = [:dataset_id, :name])
select!(n, :id,:dataset_id,:location_id,:name,:description,:created_by,:created_at,:last_modified,:modified_by,:active,:timezone_id)
#merge name and description(trip_date)
x=[]
for row in eachrow(n)
y=row.name * " " * string(row.description)
push!(x, y)
end
n.name=x
n.description .= missing
CSV.write("cluster.tsv", n,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = true # Whether to append to an existing file
)
look at 2022-03-23 cluster later, mutate to subsequent cluster, only N14 serviced
# note needed to add timezone_id to duckdb, yet to add to neon
ALTER TABLE cluster
ADD COLUMN timezone_id VARCHAR(40); -- NOT NULL; constraint fails in duckdb
UPDATE cluster
SET timezone_id = 'Pacific/Auckland'; -- empty anyway, already changed tsv.
cant easily drop it and recreate because cyclic_recording_pattern depends on it
## cyclic_recording_pattern
work back from file, todo
## files
# copy pomona files into duckdb, copy 2500kHz & 24/7 files into duckdb
COPY pomona_files_xxh64 TO 'pomona_files.csv' (HEADER, DELIMITER '\t');
COPY twenty_four_seven_files_xxh64 TO 'twenty_four_seven_files.csv' (HEADER, DELIMITER '\t');
added new columns needed in easycsv
later, loaded both into df
pdf=CSV.read("pomona_files.csv", DataFrame)
tdf=CSV.read("twenty_four_seven_files.csv", DataFrame)
pdf.location_id = ["Yx0oNUDmP5ch" for x in 1:length(pdf.location_id)]
tdf.location_id = ["gljgxDbfasva" for x in 1:length(tdf.location_id)]
pdf.description = pdf.location .* " " .* string.(pdf.trip_date)
tdf.description = tdf.location .* " " .* string.(tdf.trip_date)
pdf.storage_prefix = [nanoid(3) for x in 1:length(pdf.storage_prefix)]
tdf.storage_prefix = [nanoid(3) for x in 1:length(tdf.storage_prefix)]
pdf.upload = [true for x in 1:length(pdf.upload)]
tdf.upload = [true for x in 1:length(tdf.upload)]
pdf.created_by = ["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:length(pdf.created_by)]
tdf.created_by = ["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:length(tdf.created_by)]
pdf.modified_by = ["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:length(pdf.modified_by)]
tdf.modified_by = ["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:length(tdf.modified_by)]
w=now(tz"UTC")
pdf.created_at = [w for x in 1:length(pdf.created_at)]
tdf.created_at = [w for x in 1:length(tdf.created_at)]
pdf.last_modified = [w for x in 1:length(pdf.last_modified)]
tdf.last_modified = [w for x in 1:length(tdf.last_modified)]
pdf.active = [true for x in 1:length(pdf.active)]
tdf.active = [true for x in 1:length(tdf.active)]
pdf.dataset_id = ["Yx0oNUDmP5ch" for x in 1:length(pdf.dataset_id)]
tdf.dataset_id = ["gljgxDbfasva" for x in 1:length(tdf.dataset_id)]
pdf.record_s = [895 for x in 1:length(pdf.record_s)]
tdf.record_s = [60 for x in 1:length(tdf.record_s)]
rename!(pdf, :Column28 => :sleep_s)
rename!(tdf, :Column28 => :sleep_s)
pdf.sleep_s = [5 for x in 1:length(pdf.sleep_s)] # not needed
tdf.sleep_s = ???????????????????????????????????????????????????????????????? not needed
CSV.write("pomona_files2.tsv", pdf,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
CSV.write("twenty_four_seven_files2.tsv", tdf,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
rename!(pdf, :timestamp_utc => :timestamp)
rename!(tdf, :timestamp_utc => :timestamp)
CSV.write("pomona_files3.tsv", pdf,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
CSV.write("twenty_four_seven_files3.tsv", tdf,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
#check time format in moth metadata and my code
using Dates, TimeZones, JSON3
function is_solar_night(timestamp::String, sunrise::String, sunset::String)::Bool
ts = ZonedDateTime(timestamp)
sr = ZonedDateTime(sunrise)
ss = ZonedDateTime(sunset)
# Handle cases where sunrise is after sunset (e.g., high latitudes)
if sr > ss
return ts > ss || ts < sr
else
return ts > ss || ts < sr
end
end
function is_civil_night(timestamp::String, dawn::String, dusk::String)::Bool
ts = ZonedDateTime(timestamp)
dn = ZonedDateTime(dawn)
ds = ZonedDateTime(dusk)
# Handle cases where dawn is after dusk (e.g., high latitudes)
if dn > ds
return ts > ds || ts < dn
else
return ts > ds || ts < dn
end
end
# pomona
s=[]
c=[]
m=[]
for i in 1:nrow(pdf)
t = ZonedDateTime(String(pdf[i, :timestamp]), "yyyy-mm-dd HH:MM:SSzzzz") |>
x -> astimezone(x, tz"UTC") |>
x -> x + Second(pdf[i, :duration] |> x -> trunc(x)) ÷ 2 |> #Second conversion requires Int
x -> Dates.format(x, "yyyy-mm-ddTHH:MM:SSzzzz")
# suncalc-cli 2025-03-22T21:02:34.540+00:00 -45.50603 167.48374
cmd = `suncalc-cli $t $(pdf[i, :latitude]) $(pdf[i, :longitude])`
json = read(cmd, String) |>
x -> JSON3.read(x)
solar = is_solar_night(t, json.sunTimes.sunrise, json.sunTimes.sunset)
civil = is_civil_night(t, json.sunTimes.dawn, json.sunTimes.dusk)
moon = json.moon.phase |> x -> round(x, digits=2)
push!(s, solar)
push!(c, civil)
push!(m, moon)
println(i, " ", pdf[i, :night], " ", solar, civil, moon)
end
pdf.maybe_solar_night = s
pdf.maybe_civil_night = c
pdf.moon_phase = m
CSV.write("pomona_files4.tsv", pdf,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
# twenty_four_seven
s=[]
c=[]
m=[]
for i in 1:nrow(tdf)
t = ZonedDateTime(String(tdf[i, :timestamp]), "yyyy-mm-dd HH:MM:SSzzzz") |>
x -> astimezone(x, tz"UTC") |>
x -> x + Second(tdf[i, :duration] |> x -> trunc(x)) ÷ 2 |>
x -> Dates.format(x, "yyyy-mm-ddTHH:MM:SSzzzz")
# suncalc-cli 2025-03-22T21:02:34.540+00:00 -45.50603 167.48374
cmd = `suncalc-cli $t $(tdf[i, :latitude]) $(tdf[i, :longitude])`
json = read(cmd, String) |>
x -> JSON3.read(x)
solar = is_solar_night(t, json.sunTimes.sunrise, json.sunTimes.sunset)
civil = is_civil_night(t, json.sunTimes.dawn, json.sunTimes.dusk)
moon = json.moon.phase |> x -> round(x, digits=2)
push!(s, solar)
push!(c, civil)
push!(m, moon)
println(i, " ", tdf[i, :night], " ", solar, civil, moon)
end
tdf.maybe_solar_night = s
tdf.maybe_civil_night = c
tdf.moon_phase = m
CSV.write("twenty_four_seven_files4.tsv", tdf,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
pdf.id = [nanoid() for x in 1:length(pdf.id)]
tdf.id = [nanoid() for x in 1:length(tdf.id)]
# for aws s3
# nanoid(12; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890")
pdf.storage_prefix = [(nanoid(12; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890") * "/" * nanoid(; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890")) for x in 1:length(pdf.storage_prefix)]
tdf.storage_prefix = [(nanoid(12; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890") * "/" * nanoid(; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890")) for x in 1:length(tdf.storage_prefix)]
CSV.write("pomona_files5.tsv", pdf,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
filter(x -> x.description != "MC2 2025-02-01", tdf)
CSV.write("twenty_four_seven_files5.tsv", tdf,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
####tidy up files, deleted MC2 ######################################
# cluster_id
# join on p/t file.description , cluster.name to get cluster_id
cdf=CSV.read("cluster.tsv", DataFrame)
select!(cdf, :id=> "cluster_id", :name => "description", :dataset_id )
select!(pdf, Not([:cluster_id]))
select!(tdf, Not([:cluster_id]))
j_pdf = leftjoin(pdf, cdf, on = [:description, :dataset_id])
j_tdf = leftjoin(tdf, cdf, on = [:description, :dataset_id])
CSV.write("pomona_files6.tsv", j_pdf,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
CSV.write("twenty_four_seven_files6.tsv", j_tdf,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
# filter out my junk/mistakes 7 0's, 50 with duration not integer, see file_deleted.csv
filter!(x -> x.duration > 0, j_pdf) #deleted 7 with 0 duration after checking pomona_files in duckdb, also 1 GR6 unconfirmed
filter!(x -> (x.duration - trunc(x.duration)) == 0, j_pdf) #deleted 50 with duration not integer, see file_deleted.csv
filter!(x -> x.duration > 0, j_tdf) #deleted 7 with 0 duration after checking pomona_files in duckdb, also 1 GR6 unconfirmed
filter!(x -> (x.duration - trunc(x.duration)) == 0, j_tdf) #deleted 50 with duration not integer, see file_deleted.csv
# should have fixed timestamp_local here, see fix below instead
CSV.write("pomona_files7.tsv", j_pdf,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
CSV.write("twenty_four_seven_files7.tsv", j_tdf,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
# file table
file_a=select(j_pdf, :id, :file_name, :path, :xxh64 => "xxh64_hash", :location_id, :timestamp_local, :cluster_id, :duration, :sample_rate, :description, :storage_prefix, :upload, :maybe_solar_night, :maybe_civil_night, :moon_phase, :created_by, :created_at, :last_modified, :modified_by, :active)
file_b=select(j_tdf, :id, :file_name, :path, :xxh64 => "xxh64_hash", :location_id, :timestamp_local, :cluster_id, :duration, :sample_rate, :description, :storage_prefix, :upload, :maybe_solar_night, :maybe_civil_night, :moon_phase, :created_by, :created_at, :last_modified, :modified_by, :active)
file=vcat(file_a, file_b)
CSV.write("file.tsv", file,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
skraak_db=> \copy file FROM 'file.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
ERROR: insert or update on table "file" violates foreign key constraint "file_location_id_fkey"
DETAIL: Key (location_id)=(Yx0oNUDmP5ch) is not present in table "location"
# maybe I need to check timestamp_local and file_name before adding data below ####################################################################################
### i have dataset_id in field file.location_id
### fix here
### join location table and put location_id in correct field
fdf=CSV.read("file.tsv", DataFrame)
rename!(fdf, :location_id => "dataset_id")
fdf.name= fdf.description |> x -> split.(x, " ") |> x -> first.(x)
ldf=CSV.read("location.tsv", DataFrame)
select!(ldf, :id => "location_id", :dataset_id, :name)
# join
file=leftjoin(fdf, ldf, on = [:dataset_id, :name])
select!(file, :id, :file_name, :path, :xxh64_hash, :location_id, :timestamp_local, :cluster_id, :duration, :sample_rate, :description, :storage_prefix, :upload, :maybe_solar_night, :maybe_civil_night, :moon_phase, :created_by, :created_at, :last_modified, :modified_by, :active)
CSV.write("file2.tsv", file,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
# make sure timestamp_local is accurate, should have done earlier
# already have a file df loaded as file
md=CSV.read("moth_metadata.tsv", DataFrame)
select!(md, :file_id => "id", :timestamp)
jdf=join(file, md, on = :id)
x = jdf.timestamp |>
x -> String.(x) |>
x -> ZonedDateTime.(x, "yyyy-mm-dd HH:MM:SSzzzz") |>
x -> astimezone.(x, tz"Pacific/Auckland")
jdf.timestamp_local = x
select!(jdf, Not([:timestamp]))
names(jdf) == names(file)
CSV.write("file3.tsv", jdf,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
# moth_metadata table
file_a=select(j_pdf, :id => "file_id", :timestamp, :recorder_id, :gain, :battery_v, :temp_c, :created_at, :created_by, :last_modified, :modified_by)
file_b=select(j_tdf, :id => "file_id", :timestamp, :recorder_id, :gain, :battery_v, :temp_c, :created_at, :created_by, :last_modified, :modified_by)
file=vcat(file_a, file_b)
CSV.write("moth_metadata.tsv", file,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
# file_dataset table
file_a=select(j_pdf, :id => "file_id", :dataset_id, :created_at, :created_by, :last_modified, :modified_by)
file_b=select(j_tdf, :id => "file_id", :dataset_id, :created_at, :created_by, :last_modified, :modified_by)
file=vcat(file_a, file_b)
CSV.write("file_dataset.tsv", file,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
# species and call_type tables constructed in easycsv as very small astimezone
# selections
selection needs:
:id, :file_id, :dataset_id, :start_time, :end_time, :freq_low, :freq_high, :description, :storage_prefix, :upload, :approved, :is_solar_night, :is_civil_night, :moon_phase, :created_by, :created_at, :last_modified, :modified_by, :active
id, file_id, dataset_id, start_time, end_time, freq_low, freq_high, description, storage_prefix, upload, approved, is_solar_night, is_civil_night, moon_phase, created_by, created_at, last_modified, modified_by, active
selection metadata needs:
:selection_id, :json, :created_at, :created_by, :last_modified, :modified_by
label needs:
:id, :selection_id, :species_id, :filter, :certainty, :created_at, :created_by, :last_modified, :modified_by, :active
label_subtype needs:
:id, :label_id, :calltype_id, :filter, :certainty, :created_at, :created_by, :last_modified, :modified_by, :active
# pomona_labels_avianz
COPY pomona_labels_avianz TO 'pomona_labels_avianz.tsv' (
FORMAT 'csv',
DELIMITER '\t',
HEADER true -- leave all the other stuff out or it miss-formats noise_type
);
add columns 1 by 1
adf=CSV.read("pomona_labels_avianz.tsv", DataFrame)
# NB: need dataset_id for join on file
adf.dataset_id=["Yx0oNUDmP5ch" for x in 1:nrow(adf)] #all pomona
adf.id=[nanoid() for x in 1:length(adf.file)]
file_id, join on file table
file=CSV.read("file3.tsv", DataFrame)
select!(file, :id, :file_name, :description)
file.location = split.(file.description, " ") |> x -> first.(x)
select!(file, Not([:description]))
rename!(file, :id => :file_id, :file_name => :file)
file_dataset=CSV.read("file_dataset.tsv", DataFrame)
select!(file_dataset, :file_id, :dataset_id)
file=leftjoin(file, file_dataset, on = :file_id)
NB: join on all 3 below, do not leave out dataset_id there are duplecates accross datasets
adf=leftjoin(adf, file, on = [:location, :file, :dataset_id])
select!(adf, :location,:file,:box,:k_set,:duet,:k_close,:k_ok,:k_far,:k_m,:k_f,:k_mf,:k_umf,:other_label,:noise_level,:noise_type,:id,:file_id,:dataset_id)
findall(ismissing, adf.file_id) #8374-8396 #leftjoin failed due to missings, saved them to a csv
deleteat!(adf, 8374:8396)
saved to csv
adf.box = map(x -> eval(Meta.parse(x)), adf.box)
adf.start_time = getindex.(adf.box, 1) |> x -> ceil.(x, digits=3)
adf.end_time = getindex.(adf.box, 2) |> x -> floor.(x, digits=3)
adf.freq_low = getindex.(adf.box, 3) |> x -> ceil.(x, digits=3)
adf.freq_high = getindex.(adf.box, 4) |> x -> floor.(x, digits=3)
saved to csv
x=[]
for row in eachrow(adf)
y=row.location * " " * row.file
push!(x, y)
end
adf.description = x
saved to csv
adf.storage_prefix = [(nanoid(12; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890") * "/" * nanoid(; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890")) for x in 1:length(adf.file)]
saved to csv
adf.upload = [true for x in 1:length(adf.file)]
adf.approved = [true for x in 1:length(adf.file)]
saved to csv
need to join timestamp from file table, have file_id so can join on that
file_time=CSV.read("moth_metadata.tsv", DataFrame)
select!(file_time, :file_id, :timestamp)
adf=leftjoin(adf, file_time, on = :file_id)
findall(ismissing, adf.timestamp) #Int64[]
need latitude and longitude from file - location
file_location=CSV.read("file3.tsv", DataFrame)
select!(file_location, :id => "file_id", :location_id)
location=CSV.read("location.tsv", DataFrame)
select!(location, :id => "location_id", :latitude, :longitude)
ldf=leftjoin(file_location, location, on = :location_id)
select!(ldf, Not([:location_id]))
adf=leftjoin(adf, ldf, on = :file_id)
saved to csv
using Dates, TimeZones, JSON3
functions is_solar_night and is_civil_night from above
s=[]
c=[]
m=[]
for i in 1:nrow(adf)
t = ZonedDateTime(String(adf[i, :timestamp]), "yyyy-mm-dd HH:MM:SSzzzz") |>
x -> astimezone(x, tz"UTC") |>
x -> Dates.format(x, "yyyy-mm-ddTHH:MM:SSzzzz")
# suncalc-cli 2025-03-22T21:02:34.540+00:00 -45.50603 167.48374
cmd = `suncalc-cli $t $(adf[i, :latitude]) $(adf[i, :longitude])`
json = read(cmd, String) |>
x -> JSON3.read(x)
solar = is_solar_night(t, json.sunTimes.sunrise, json.sunTimes.sunset)
civil = is_civil_night(t, json.sunTimes.dawn, json.sunTimes.dusk)
moon = json.moon.phase |> x -> round(x, digits=2)
push!(s, solar)
push!(c, civil)
push!(m, moon)
println(i, " ", solar, civil, moon)
end
adf.is_solar_night = s
adf.is_civil_night = c
adf.moon_phase = m
saved to csv
adf.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:length(adf.file)]
adf.created_at=["2025-03-31T13:14:50.083+00:00" for x in 1:length(adf.file)]
adf.last_modified=["2025-03-31T13:14:50.083+00:00" for x in 1:length(adf.file)]
adf.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:length(adf.file)]
adf.active=[true for x in 1:length(adf.file)]
saved to csv
selection=select(adf, :id, :file_id, :dataset_id, :start_time, :end_time, :freq_low, :freq_high, :description, :storage_prefix, :upload, :approved, :is_solar_night, :is_civil_night, :moon_phase, :created_by, :created_at, :last_modified, :modified_by, :active)
CSV.write("selection_avianz.tsv", selection,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
adf.selection_metadata = ["" for x in 1:nrow(adf)]
adf.selection_metadata[adf.k_close .== true] .= """{"distance":"close"}"""
adf.selection_metadata[adf.k_ok .== true] .= """{"distance":"ok"}"""
adf.selection_metadata[adf.k_far .== true] .= """{"distance":"far"}"""
selection_metadata= filter(row -> !ismissing(row.selection_metadata), adf) |>
x -> filter(row -> row.selection_metadata != "", x) |>
x -> select(x, :id => "selection_id", :selection_metadata => "json", :created_at, :created_by, :last_modified, :modified_by)
CSV.write("selection_metadata_avianz.tsv", selection_metadata,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
using JSON3
adf.file_metadata = ["" for x in 1:nrow(adf)]
for row in eachrow(adf)
dict=Dict()
!ismissing(row.noise_level) && (dict["noiseLevel"] = row.noise_level)
!ismissing(row.noise_type) && (dict["noiseType"] = row.noise_type)
json_string = JSON3.write(dict)
row.file_metadata = json_string
#println(json_string)
end
saved adf to csv
CSV.write("pomona_labels_avianz.tsv", adf,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
file_metadata= filter(row -> !ismissing(row.file_metadata), adf) |>
x -> filter(row -> row.file_metadata != "", x) |>
x -> select(x, :file_id, :file_metadata => "json", :created_at, :created_by, :last_modified, :modified_by) |>
x -> groupby(x, :file_id) |>
x -> combine(x, first)
CSV.write("file_metadata_avianz.tsv", file_metadata,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
need:
:species_id, :filter, :certainty
:calltype_id, :filter, :certainty
ignore k_set, amalgamate m+mf+umf, amalgamate f+mf, take duet
l_arr=[]
lst_arr=[]
for row in eachrow(adf)
l_id = nanoid()
if row.k_set #not using k_mf or k_umf
push!(l_arr, (id=l_id, selection_id=row.id, species_id="ANhV8iZPfIh8")) #Haast Kiwi
row.k_m && push!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id = "dD9cLYtq_G5a")) #M
row.k_f && push!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id = "DJ4cFnVHc7NM")) #F
if row.duet #so it has the correct label_id
push!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id="dD9cLYtq_G5a")) #D
end
end
#its either k_set or 'other_label', never both at the same time
occursin("Morepork", coalesce(row.other_label, "")) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="OA9X68Mrv4v8"))
occursin("Kaka (Sth Is)", coalesce(row.other_label, "")) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="MMOLwZw9fVSn"))
occursin("Don't Know", coalesce(row.other_label, "")) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="e-bBi7tasfya"))
occursin("Cuckoo (Long-tailed)", coalesce(row.other_label, "")) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="rqIcB4EkSy2G"))
end
label=DataFrame(l_arr)
label.filter_id=["6C-d1OtNF3GJ" for x in 1:nrow(label)]
label.certainty=[100 for x in 1:nrow(label)]
label.created_at=["2025-04-01T07:04:07.167+00:00" for x in 1:nrow(label)]
label.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label)]
label.last_modified=["2025-04-01T07:04:07.167+00:00" for x in 1:nrow(label)]
label.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label)]
label.active=[true for x in 1:nrow(label)]
CSV.write("label_avianz.tsv", label,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
label_subtype=DataFrame(lst_arr)
label_subtype.filter_id=["E721dNTaHio7" for x in 1:nrow(label_subtype)]
label_subtype.certainty=[100 for x in 1:nrow(label_subtype)]
label_subtype.created_at=["2025-04-01T07:04:07.167+00:00" for x in 1:nrow(label_subtype)]
label_subtype.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label_subtype)]
label_subtype.last_modified=["2025-04-01T07:04:07.167+00:00" for x in 1:nrow(label_subtype)]
label_subtype.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label_subtype)]
label_subtype.active=[true for x in 1:nrow(label_subtype)]
CSV.write("label_subtype_avianz.tsv", label_subtype,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
species-dataset table
just started with the species table and edited it in easycsv
# pomona_labels_20230418
COPY pomona_labels_20230418 TO 'pomona_labels_20230418.tsv' (
FORMAT 'csv',
DELIMITER '\t',
HEADER true,
QUOTE '"', -- Use double quotes for quoting
ESCAPE '\', -- Use backslash as escape character
NULL '' -- How NULL values should appear
);
selection needs:
:id, :file_id, :dataset_id, :start_time, :end_time, :freq_low, :freq_high, :description, :storage_prefix, :upload, :approved, :is_solar_night, :is_civil_night, :moon_phase, :created_by, :created_at, :last_modified, :modified_by, :active
id, file_id, dataset_id, start_time, end_time, freq_low, freq_high, description, storage_prefix, upload, approved, is_solar_night, is_civil_night, moon_phase, created_by, created_at, last_modified, modified_by, active
label needs:
:id, :selection_id, :species_id, :filter, :certainty, :created_at, :created_by, :last_modified, :modified_by, :active
label_subtype needs:
:id, :label_id, :calltype_id, :filter, :certainty, :created_at, :created_by, :last_modified, :modified_by, :active
using DataFrames, CSV, Dates, TimeZones, Nanoid
pdf=CSV.read("pomona_labels_20230418.tsv", DataFrame)
pdf.dataset_id=["Yx0oNUDmP5ch" for x in 1:nrow(pdf)] #all pomona
pdf.id=[nanoid() for x in 1:length(pdf.file)]
#file_id, join on file table
file=CSV.read("file3.tsv", DataFrame)
select!(file, :id, :file_name, :description)
file.location = split.(file.description, " ") |> x -> first.(x)
select!(file, Not([:description]))
rename!(file, :id => :file_id, :file_name => :file)
file_dataset=CSV.read("file_dataset.tsv", DataFrame)
select!(file_dataset, :file_id, :dataset_id)
file=leftjoin(file, file_dataset, on = :file_id)
pdf=leftjoin(pdf, file, on = [:location, :file, :dataset_id])
select!(pdf, :location,:file,:box,:male,:female,:duet,:not_kiwi,:other_label,:close_call,:ok_call,:far_call,:low_noise,:medium_noise,:high_noise,:terrible_noise,:id,:file_id,:dataset_id)
findall(ismissing, pdf.file_id) #69688:69691
69688 │ C05 20211124_033000.WAV [
69689 │ M02 20211124_214500.WAV [
69690 │ C05 20230215_030000.WAV [
69691 │ C05 20230215_030000.WAV
deleteat!(pdf,69688:69691)
CSV.write("pomona_labels_20230418.tsv", pdf,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
pdf.box = map(x -> eval(Meta.parse(x)), pdf.box)
pdf.start_time = getindex.(pdf.box, 1) |> x -> ceil.(x, digits=3)
pdf.end_time = getindex.(pdf.box, 2) |> x -> floor.(x, digits=3)
pdf.freq_low = [missing for x in 1:nrow(pdf)]
pdf.freq_high = [missing for x in 1:nrow(pdf)]
x=[]
for row in eachrow(pdf)
y=row.location * " " * row.file
push!(x, y)
end
pdf.description = x
pdf.storage_prefix = [(nanoid(12; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890") * "/" * nanoid(; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890")) for x in 1:length(pdf.file)]
pdf.upload = [true for x in 1:length(pdf.file)]
pdf.approved = [true for x in 1:length(pdf.file)]
# need to join timestamp from file table, have file_id so can join on that
file_time=CSV.read("moth_metadata.tsv", DataFrame)
select!(file_time, :file_id, :timestamp)
pdf=leftjoin(pdf, file_time, on = :file_id)
findall(ismissing, pdf.timestamp) #Int64[]
# need latitude and longitude from file - location
file_location=CSV.read("file3.tsv", DataFrame)
select!(file_location, :id => "file_id", :location_id)
location=CSV.read("location.tsv", DataFrame)
select!(location, :id => "location_id", :latitude, :longitude)
ldf=leftjoin(file_location, location, on = :location_id)
select!(ldf, Not([:location_id]))
pdf=leftjoin(pdf, ldf, on = :file_id)
using Dates, TimeZones, JSON3
functions is_solar_night and is_civil_night from above
s=[]
c=[]
m=[]
for i in 1:nrow(pdf)
t = ZonedDateTime(String(pdf[i, :timestamp]), "yyyy-mm-dd HH:MM:SSzzzz") |>
x -> astimezone(x, tz"UTC") |>
x -> Dates.format(x, "yyyy-mm-ddTHH:MM:SSzzzz")
# suncalc-cli 2025-03-22T21:02:34.540+00:00 -45.50603 167.48374
cmd = `suncalc-cli $t $(pdf[i, :latitude]) $(pdf[i, :longitude])`
json = read(cmd, String) |>
x -> JSON3.read(x)
solar = is_solar_night(t, json.sunTimes.sunrise, json.sunTimes.sunset)
civil = is_civil_night(t, json.sunTimes.dawn, json.sunTimes.dusk)
moon = json.moon.phase |> x -> round(x, digits=2)
push!(s, solar)
push!(c, civil)
push!(m, moon)
println(i, " ", solar, civil, moon)
end
pdf.is_solar_night = s
pdf.is_civil_night = c
pdf.moon_phase = m
#saved to csv
CSV.write("pomona_labels_20230418.tsv", pdf,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
pdf.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:length(pdf.file)]
pdf.created_at=["2025-04-02T20:53:47.171+00:00" for x in 1:length(pdf.file)]
pdf.last_modified=["2025-03-31T13:14:50.083+00:00" for x in 1:length(pdf.file)]
pdf.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:length(pdf.file)]
pdf.active=[true for x in 1:length(pdf.file)]
# saved pdf to csv
selection=select(pdf, :id, :file_id, :dataset_id, :start_time, :end_time, :freq_low, :freq_high, :description, :storage_prefix, :upload, :approved, :is_solar_night, :is_civil_night, :moon_phase, :created_by, :created_at, :last_modified, :modified_by, :active)
CSV.write("selection_pomona_20230418.tsv", selection,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
l_arr=[]
lst_arr=[]
s_meta=[]
for row in eachrow(pdf)
l_id = nanoid()
if true in [row.male, row.female, row.duet]
push!(l_arr, (id=l_id, selection_id=row.id, species_id="ANhV8iZPfIh8")) #Haast Kiwi
row.male && push!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id = "dD9cLYtq_G5a")) #M
row.female && push!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id = "DJ4cFnVHc7NM")) #F
row.duet && push!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id="eDZD2ET_Dghm")) #D
# do selection metadata seperately
end
x=coalesce(row.other_label, "")
# Q, Question will go to selection metadata, also Baby kiwi will go to there as well
occursin("Kiwi", x) && begin
push!(l_arr, (id=l_id, selection_id=row.id, species_id="ANhV8iZPfIh8")) #Haast Kiwi
push!(s_meta, (selection_id=row.id, json="""{"note":"Baby Kiwi maybe?"}"""))
end
occursin("Q", x) && push!(s_meta, (selection_id=row.id, json="""{"note":"Question"}"""))
if row.not_kiwi
isempty(x) && begin
push!(l_arr, (id=l_id, selection_id=row.id, species_id="e-bBi7tasfya")) # Don't Know
push!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id="GiUuQbRfmG30")) # Not
end
occursin("Morepork", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="OA9X68Mrv4v8"))
occursin("Kaka", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="MMOLwZw9fVSn"))
occursin("Don't Know", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="e-bBi7tasfya"))
occursin("Cuckoo (Long-tailed)", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="rqIcB4EkSy2G"))
occursin("G", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="YPFfCCNvDuuK"))
occursin("Kea", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="CWYXOm6bKs4V"))
end
end
label=DataFrame(l_arr)
label.filter_id=["uWoU4s46qSPd" for x in 1:nrow(label)]
label.certainty=[100 for x in 1:nrow(label)]
label.created_at=["2025-04-04T02:51:11.257+00:00" for x in 1:nrow(label)]
label.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label)]
label.last_modified=["2025-04-04T02:51:11.257+00:00" for x in 1:nrow(label)]
label.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label)]
label.active=[true for x in 1:nrow(label)]
CSV.write("label_pomona_20230418.tsv", label,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
label_subtype=DataFrame(lst_arr)
label_subtype.filter_id=["ZIm0-Cs_FOmP" for x in 1:nrow(label_subtype)]
label_subtype.certainty=[100 for x in 1:nrow(label_subtype)]
label_subtype.created_at=["2025-04-04T02:51:11.257+00:00" for x in 1:nrow(label_subtype)]
label_subtype.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label_subtype)]
label_subtype.last_modified=["2025-04-04T02:51:11.257+00:00" for x in 1:nrow(label_subtype)]
label_subtype.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label_subtype)]
label_subtype.active=[true for x in 1:nrow(label_subtype)]
CSV.write("label_subtype_pomona_20230418.tsv", label_subtype,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
# s_meta
selection_meta=DataFrame(s_meta)
selection_meta.created_at=["2025-04-04T02:51:11.257+00:00" for x in 1:nrow(selection_meta)]
selection_meta.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(selection_meta)]
selection_meta.last_modified=["2025-04-04T02:51:11.257+00:00" for x in 1:nrow(selection_meta)]
selection_meta.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(selection_meta)]
CSV.write("selection_metadata_pomona_20230418.tsv", selection_meta,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
CSV.write("pomona_labels_20230418.tsv", pdf,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
# pomona_labels_20250304
COPY pomona_labels_20250304 TO 'pomona_labels_20250304.tsv' (
FORMAT 'csv',
DELIMITER '\t',
HEADER true,
QUOTE '"', -- Use double quotes for quoting
ESCAPE '\', -- Use backslash as escape character
NULL '' -- How NULL values should appear
);
pdf=CSV.read("pomona_labels_20250304.tsv", DataFrame)
pdf.dataset_id=["Yx0oNUDmP5ch" for x in 1:nrow(pdf)] #all pomona
pdf.id=[nanoid() for x in 1:length(pdf.file)]
#file_id, join on file table
file=CSV.read("file3.tsv", DataFrame)
select!(file, :id, :file_name, :description)
file.location = split.(file.description, " ") |> x -> first.(x)
select!(file, Not([:description]))
rename!(file, :id => :file_id, :file_name => :file)
file_dataset=CSV.read("file_dataset.tsv", DataFrame)
select!(file_dataset, :file_id, :dataset_id)
file=leftjoin(file, file_dataset, on = :file_id)
pdf=leftjoin(pdf, file, on = [:location, :file, :dataset_id])
select!(pdf, :location,:file,:box,:male,:female,:duet,:not_kiwi,:other_label,:id,:file_id,:dataset_id)
findall(ismissing, pdf.file_id) #83330:83333
83330
83331
83332
83333
deleteat!(pdf,83330:83333)
CSV.write("pomona_labels_20250304.tsv", pdf,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
pdf.box = map(x -> eval(Meta.parse(x)), pdf.box)
pdf.start_time = getindex.(pdf.box, 1) |> x -> ceil.(x, digits=3)
pdf.end_time = getindex.(pdf.box, 2) |> x -> floor.(x, digits=3)
pdf.freq_low = [missing for x in 1:nrow(pdf)]
pdf.freq_high = [missing for x in 1:nrow(pdf)]
x=[]
for row in eachrow(pdf)
y=row.location * " " * row.file
push!(x, y)
end
pdf.description = x
pdf.storage_prefix = [(nanoid(12; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890") * "/" * nanoid(; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890")) for x in 1:length(pdf.file)]
pdf.upload = [true for x in 1:length(pdf.file)]
pdf.approved = [true for x in 1:length(pdf.file)]
# need to join timestamp from file table, have file_id so can join on that
file_time=CSV.read("moth_metadata.tsv", DataFrame)
select!(file_time, :file_id, :timestamp)
pdf=leftjoin(pdf, file_time, on = :file_id)
findall(ismissing, pdf.timestamp) #Int64[]
# need latitude and longitude from file - location
file_location=CSV.read("file3.tsv", DataFrame)
select!(file_location, :id => "file_id", :location_id)
location=CSV.read("location.tsv", DataFrame)
select!(location, :id => "location_id", :latitude, :longitude)
ldf=leftjoin(file_location, location, on = :location_id)
select!(ldf, Not([:location_id]))
pdf=leftjoin(pdf, ldf, on = :file_id)
using Dates, TimeZones, JSON3
functions is_solar_night and is_civil_night from above
s=[]
c=[]
m=[]
for i in 1:nrow(pdf)
t = ZonedDateTime(String(pdf[i, :timestamp]), "yyyy-mm-dd HH:MM:SSzzzz") |>
x -> astimezone(x, tz"UTC") |>
x -> Dates.format(x, "yyyy-mm-ddTHH:MM:SSzzzz")
# suncalc-cli 2025-03-22T21:02:34.540+00:00 -45.50603 167.48374
cmd = `suncalc-cli $t $(pdf[i, :latitude]) $(pdf[i, :longitude])`
json = read(cmd, String) |>
x -> JSON3.read(x)
solar = is_solar_night(t, json.sunTimes.sunrise, json.sunTimes.sunset)
civil = is_civil_night(t, json.sunTimes.dawn, json.sunTimes.dusk)
moon = json.moon.phase |> x -> round(x, digits=2)
push!(s, solar)
push!(c, civil)
push!(m, moon)
println(i, " ", solar, civil, moon)
end
pdf.is_solar_night = s
pdf.is_civil_night = c
pdf.moon_phase = m
#saved to csv
CSV.write("pomona_labels_20250304.tsv", pdf,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
pdf.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:length(pdf.file)]
pdf.created_at=["2025-04-04T17:57:30.649+00:00" for x in 1:length(pdf.file)]
pdf.last_modified=["2025-04-04T17:57:30.649+00:00" for x in 1:length(pdf.file)]
pdf.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:length(pdf.file)]
pdf.active=[true for x in 1:length(pdf.file)]
# saved pdf to csv
selection=select(pdf, :id, :file_id, :dataset_id, :start_time, :end_time, :freq_low, :freq_high, :description, :storage_prefix, :upload, :approved, :is_solar_night, :is_civil_night, :moon_phase, :created_by, :created_at, :last_modified, :modified_by, :active)
CSV.write("selection_pomona_20250304.tsv", selection,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
l_arr=[]
lst_arr=[]
s_meta=[]
for row in eachrow(pdf)
l_id = nanoid()
if true in [row.male, row.female, row.duet]
push!(l_arr, (id=l_id, selection_id=row.id, species_id="ANhV8iZPfIh8")) #Haast Kiwi
row.male && push!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id = "dD9cLYtq_G5a")) #M
row.female && push!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id = "DJ4cFnVHc7NM")) #F
row.duet && push!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id="eDZD2ET_Dghm")) #D
# do selection metadata seperately
end
x=coalesce(row.other_label, "")
# Q, Question will go to selection metadata, also Baby kiwi will go to there as well
occursin("Kiwi", x) && begin # Maybe Baby Kiwi
push!(l_arr, (id=l_id, selection_id=row.id, species_id="ANhV8iZPfIh8")) #Haast Kiwi
push!(s_meta, (selection_id=row.id, json="""{"note":"Baby Kiwi maybe?"}"""))
end
occursin("Q", x) && push!(s_meta, (selection_id=row.id, json="""{"note":"Question"}"""))
if row.not_kiwi
isempty(x) && begin
push!(l_arr, (id=l_id, selection_id=row.id, species_id="e-bBi7tasfya")) # Don't Know
push!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id="GiUuQbRfmG30")) # Not
end
occursin("Morepork", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="OA9X68Mrv4v8"))
occursin("Kaka", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="MMOLwZw9fVSn"))
occursin("Don't Know", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="e-bBi7tasfya"))
occursin("Cuckoo (Long-tailed)", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="rqIcB4EkSy2G"))
occursin("G", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="YPFfCCNvDuuK")) #Gecko
occursin("Kea", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="CWYXOm6bKs4V"))
end
end
label=DataFrame(l_arr)
label.filter_id=["uWoU4s46qSPd" for x in 1:nrow(label)]
label.certainty=[100 for x in 1:nrow(label)]
label.created_at=["2025-04-04T18:08:30.111+00:00" for x in 1:nrow(label)]
label.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label)]
label.last_modified=["2025-04-04T18:08:30.111+00:00" for x in 1:nrow(label)]
label.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label)]
label.active=[true for x in 1:nrow(label)]
CSV.write("label_pomona_20250304.tsv", label,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
label_subtype=DataFrame(lst_arr)
label_subtype.filter_id=["ZIm0-Cs_FOmP" for x in 1:nrow(label_subtype)]
label_subtype.certainty=[100 for x in 1:nrow(label_subtype)]
label_subtype.created_at=["2025-04-04T18:08:30.111+00:00" for x in 1:nrow(label_subtype)]
label_subtype.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label_subtype)]
label_subtype.last_modified=["2025-04-04T18:08:30.111+00:00" for x in 1:nrow(label_subtype)]
label_subtype.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label_subtype)]
label_subtype.active=[true for x in 1:nrow(label_subtype)]
CSV.write("label_subtype_pomona_20250304.tsv", label_subtype,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
# s_meta
selection_meta=DataFrame(s_meta)
selection_meta.created_at=["2025-04-04T02:51:11.257+00:00" for x in 1:nrow(selection_meta)]
selection_meta.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(selection_meta)]
selection_meta.last_modified=["2025-04-04T18:08:30.111+00:00" for x in 1:nrow(selection_meta)]
selection_meta.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(selection_meta)]
CSV.write("selection_metadata_pomona_20250304.tsv", selection_meta,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
CSV.write("pomona_labels_20250304.tsv", pdf,
delim = '\t', # Tab delimiter
header = true, # Include header (default)
quotechar = '"', # Character used for quoting
missingstring = "", # How missing values are written
append = false # Whether to append to an existing file
)
# make copy of postgres to duckdb
duckdb skraak.duuckdb
INSTALL postgres; -- only run first time
LOAD postgres;
-- Attach the PostgreSQL database using the given connection string
ATTACH 'host=localhost port=5432 dbname=mydb' AS test (TYPE postgres);
-- The table "tbl_name" can now be queried as if it is a regular table
SELECT * FROM test.tbl_name;
-- Switch the active database to "test"
USE test;
-- List all tables in the file
SHOW TABLES;
-- copy whole pg db to duckdb
COPY FROM DATABASE postgres_db TO my_duckdb_db;
// Drizzle handles materialized views differently, excluded the materialized view `ebird_taxonomy_v2024` from this conversion. Implement that at the query level in application code.
// CHECK Constraints not implemented in drizzle
// location table
// CONSTRAINT latitude_range CHECK (latitude BETWEEN -90.0 AND 90.0)
// CONSTRAINT longitude_range CHECK (longitude BETWEEN -180.0 AND 180.0)
// GIN Indexes for JSONB not implemented in drizzle
// INDEX idx_file_metadata_json_gin ON file_metadata USING gin(json)
import { pgEnum, pgTable, varchar, boolean, timestamp, decimal, integer, json, primaryKey, index, unique, foreignKey, uniqueIndex, bigint } from "drizzle-orm/pg-core";
import { sql } from "drizzle-orm";
// Enum types
export const datasetTypeEnum = pgEnum("dataset_type", ["organise", "test", "train"]);
export const gainLevelEnum = pgEnum("gain_level", ["low", "low-medium", "medium", "medium-high", "high"]);
export const roleNameEnum = pgEnum("role_name", ["ADMIN", "USER", "CURATOR"]);
export const permissionNameEnum = pgEnum("permission_name", ["READ", "UPLOAD", "DOWNLOAD", "EDIT", "DELETE"]);
// Dataset Table
export const dataset = pgTable("dataset", {
id: varchar("id", { length: 12 }).primaryKey(), // nanoid(12)
name: varchar("name", { length: 255 }).notNull(),
public: boolean("public").default(false), // Whether dataset is publicly accessible
description: varchar("description", { length: 255 }),
createdBy: varchar("created_by", { length: 64 }).notNull(), // kinde user id
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),
lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),
modifiedBy: varchar("modified_by", { length: 64 }).notNull(), // kinde user id
owner: varchar("owner", { length: 64 }).notNull(), // kinde user id
active: boolean("active").default(true),
type: datasetTypeEnum("type").notNull().default("organise"),
}, (table) => ({
nameIdx: index("idx_dataset_name").on(table.name),
ownerIdx: index("idx_dataset_owner").on(table.owner),
activeIdx: index("idx_dataset_active").on(table.active),
publicIdx: index("idx_dataset_public").on(table.public),
}));
// Locations Table
export const location = pgTable("location", {
id: varchar("id", { length: 12 }).primaryKey(), // nanoid(12)
datasetId: varchar("dataset_id", { length: 12 }).notNull().references(() => dataset.id),
name: varchar("name", { length: 140 }).notNull(),
latitude: decimal("latitude", { precision: 10, scale: 7 }), // -45.5027
longitude: decimal("longitude", { precision: 10, scale: 7 }), // 167.48406
description: varchar("description", { length: 255 }),
createdBy: varchar("created_by", { length: 64 }).notNull(),
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),
lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),
modifiedBy: varchar("modified_by", { length: 64 }).notNull(),
active: boolean("active").default(true),
}, (table) => ({
nameIdx: index("idx_location_name").on(table.name),
datasetIdx: index("idx_location_dataset").on(table.datasetId),
activeIdx: index("idx_location_active").on(table.active),
datasetActiveIdx: index("idx_location_dataset_active").on(table.datasetId, table.active),
}));
// Cyclic Recording Pattern Table
export const cyclicRecordingPattern = pgTable("cyclic_recording_pattern", {
id: varchar("id", { length: 12 }).primaryKey(), // nanoid(12)
recordS: integer("record_s").notNull(),
sleepS: integer("sleep_s").notNull(),
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),
createdBy: varchar("created_by", { length: 64 }).notNull(),
lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),
modifiedBy: varchar("modified_by", { length: 64 }).notNull(),
});
// Cluster Table
export const cluster = pgTable("cluster", {
id: varchar("id", { length: 12 }).primaryKey(), // nanoid(12)
datasetId: varchar("dataset_id", { length: 12 }).notNull().references(() => dataset.id),
locationId: varchar("location_id", { length: 12 }).notNull().references(() => location.id),
name: varchar("name", { length: 140 }).notNull(),
description: varchar("description", { length: 255 }),
createdBy: varchar("created_by", { length: 64 }).notNull(),
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),
lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),
modifiedBy: varchar("modified_by", { length: 64 }).notNull(),
active: boolean("active").default(true),
timezoneId: varchar("timezone_id", { length: 40 }),
cyclicRecordingPatternId: varchar("cyclic_recording_pattern_id", { length: 12 }).references(() => cyclicRecordingPattern.id),
}, (table) => ({
datasetIdx: index("idx_cluster_dataset").on(table.datasetId),
activeIdx: index("idx_cluster_active").on(table.active),
datasetActiveIdx: index("idx_cluster_dataset_active").on(table.datasetId, table.active),
recordingPatternIdx: index("idx_cluster_recording_pattern").on(table.cyclicRecordingPatternId),
}));
// Files Table
export const file = pgTable("file", {
id: varchar("id", { length: 21 }).primaryKey(), // nanoid
fileName: varchar("file_name", { length: 255 }).notNull(),
path: varchar("path", { length: 255 }),
xxh64Hash: varchar("xxh64_hash", { length: 16 }).notNull(),
locationId: varchar("location_id", { length: 12 }).notNull().references(() => location.id),
timestampLocal: timestamp("timestamp_local", { withTimezone: true }).notNull(),
clusterId: varchar("cluster_id", { length: 12 }).references(() => cluster.id),
duration: decimal("duration", { precision: 7, scale: 3 }).notNull(),
sampleRate: integer("sample_rate").notNull(),
description: varchar("description", { length: 255 }),
storagePrefix: varchar("storage_prefix", { length: 34 }).notNull(),
upload: boolean("upload").default(true),
maybeSolarNight: boolean("maybe_solar_night"),
maybeCivilNight: boolean("maybe_civil_night"),
moonPhase: decimal("moon_phase", { precision: 3, scale: 2 }),
createdBy: varchar("created_by", { length: 64 }).notNull(),
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),
lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),
modifiedBy: varchar("modified_by", { length: 64 }).notNull(),
active: boolean("active").default(true),
}, (table) => ({
locationIdx: index("idx_file_location").on(table.locationId),
activeIdx: index("idx_file_active").on(table.active),
timestampLocalIdx: index("idx_file_timestamp_local").on(table.timestampLocal),
clusterIdx: index("idx_file_cluster").on(table.clusterId),
maybeSolarNightIdx: index("idx_file_maybe_solar_night").on(table.maybeSolarNight),
maybeCivilNightIdx: index("idx_file_maybe_civil_night").on(table.maybeCivilNight),
}));
// Moth Metadata Table
export const mothMetadata = pgTable("moth_metadata", {
fileId: varchar("file_id", { length: 21 }).primaryKey().references(() => file.id),
timestamp: timestamp("timestamp", { withTimezone: true }).notNull(),
recorderId: varchar("recorder_id", { length: 16 }),
gain: gainLevelEnum("gain"),
batteryV: decimal("battery_v", { precision: 2, scale: 1 }),
tempC: decimal("temp_c", { precision: 3, scale: 1 }),
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),
createdBy: varchar("created_by", { length: 64 }).notNull(),
lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),
modifiedBy: varchar("modified_by", { length: 64 }).notNull(),
});
// File Metadata Table
export const fileMetadata = pgTable("file_metadata", {
fileId: varchar("file_id", { length: 21 }).primaryKey().references(() => file.id),
json: json("json"),
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),
createdBy: varchar("created_by", { length: 64 }).notNull(),
lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),
modifiedBy: varchar("modified_by", { length: 64 }).notNull(),
}, (table) => ({
jsonIdx: index("idx_file_metadata_json").on(table.json),
}));
// Junction Table for Files to Dataset
export const fileDataset = pgTable("file_dataset", {
fileId: varchar("file_id", { length: 21 }).notNull().references(() => file.id),
datasetId: varchar("dataset_id", { length: 12 }).notNull().references(() => dataset.id),
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),
createdBy: varchar("created_by", { length: 64 }).notNull(),
lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),
modifiedBy: varchar("modified_by", { length: 64 }).notNull(),
}, (table) => ({
pk: primaryKey({columns: [table.fileId, table.datasetId]}),
fileIdx: index("idx_file_dataset_file").on(table.fileId),
datasetIdx: index("idx_file_dataset_dataset").on(table.datasetId),
}));
// Selection Table
export const selection = pgTable("selection", {
id: varchar("id", { length: 21 }).primaryKey(), // nanoid
fileId: varchar("file_id", { length: 21 }).notNull().references(() => file.id),
datasetId: varchar("dataset_id", { length: 12 }).notNull().references(() => dataset.id),
startTime: decimal("start_time", { precision: 7, scale: 3 }).notNull(),
endTime: decimal("end_time", { precision: 7, scale: 3 }).notNull(),
freqLow: decimal("freq_low", { precision: 9, scale: 3 }),
freqHigh: decimal("freq_high", { precision: 9, scale: 3 }),
description: varchar("description", { length: 255 }),
storagePrefix: varchar("storage_prefix", { length: 34 }).notNull(),
upload: boolean("upload").default(true),
approved: boolean("approved").default(false),
isSolarNight: boolean("is_solar_night"),
isCivilNight: boolean("is_civil_night"),
moonPhase: decimal("moon_phase", { precision: 3, scale: 2 }),
createdBy: varchar("created_by", { length: 64 }).notNull(),
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),
lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),
modifiedBy: varchar("modified_by", { length: 64 }).notNull(),
active: boolean("active").default(true),
}, (table) => ({
fileIdx: index("idx_selection_file").on(table.fileId),
datasetIdx: index("idx_selection_dataset").on(table.datasetId),
activeIdx: index("idx_selection_active").on(table.active),
approvedIdx: index("idx_selection_approved").on(table.approved),
isSolarNightIdx: index("idx_selection_is_solar_night").on(table.isSolarNight),
isCivilNightIdx: index("idx_selection_is_civil_night").on(table.isCivilNight),
moonPhaseIdx: index("idx_selection_moon_phase").on(table.moonPhase),
// Adding the foreign key constraint to fileDataset
fkFileDataset: foreignKey({
columns: [table.fileId, table.datasetId],
foreignColumns: [fileDataset.fileId, fileDataset.datasetId],
name: "fk_selection_file_dataset"
}),
}));
// Selection Metadata Table
export const selectionMetadata = pgTable("selection_metadata", {
selectionId: varchar("selection_id", { length: 21 }).primaryKey().references(() => selection.id),
json: json("json"),
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),
createdBy: varchar("created_by", { length: 64 }).notNull(),
lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),
modifiedBy: varchar("modified_by", { length: 64 }).notNull(),
}, (table) => ({
jsonIdx: index("idx_selection_metadata_json").on(table.json),
}));
// eBird Taxonomy Table
export const ebirdTaxonomy = pgTable("ebird_taxonomy", {
id: varchar("id", { length: 12 }).primaryKey(),
taxonomyVersion: varchar("taxonomy_version", { length: 4 }).notNull(),
taxonOrder: integer("taxon_order").notNull(),
category: varchar("category", { length: 15 }).notNull(),
speciesCode: varchar("species_code", { length: 15 }).notNull(),
taxonConceptId: varchar("taxon_concept_id", { length: 15 }),
primaryComName: varchar("primary_com_name", { length: 100 }).notNull(),
sciName: varchar("sci_name", { length: 100 }).notNull(),
birdOrder: varchar("bird_order", { length: 30 }),
family: varchar("family", { length: 100 }),
speciesGroup: varchar("species_group", { length: 100 }),
reportAs: varchar("report_as", { length: 15 }),
validFrom: timestamp("valid_from").notNull(),
validTo: timestamp("valid_to"),
}, (table) => ({
speciesCodeVersionUnique: unique().on(table.speciesCode, table.taxonomyVersion),
}));
// Species Table
export const species = pgTable("species", {
id: varchar("id", { length: 12 }).primaryKey(), // nanoid(12)
label: varchar("label", { length: 100 }).notNull(),
ebirdCode: varchar("ebird_code", { length: 12 }),
taxonomyVersion: varchar("taxonomy_version", { length: 4 }),
description: varchar("description", { length: 255 }),
createdBy: varchar("created_by", { length: 64 }).notNull(),
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),
lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),
modifiedBy: varchar("modified_by", { length: 64 }).notNull(),
active: boolean("active").default(true),
owner: varchar("owner", { length: 64 }).notNull(),
}, (table) => ({
labelIdx: index("idx_species_label").on(table.label),
ebirdIdx: index("idx_species_ebird").on(table.ebirdCode),
// Foreign key reference to ebird_taxonomy
ebirdTaxonomyRef: foreignKey({
columns: [table.ebirdCode, table.taxonomyVersion],
foreignColumns: [ebirdTaxonomy.speciesCode, ebirdTaxonomy.taxonomyVersion],
}),
}));
// Call Types Table
export const callType = pgTable("call_type", {
id: varchar("id", { length: 12 }).primaryKey(), // nanoid(12)
speciesId: varchar("species_id", { length: 12 }).notNull().references(() => species.id),
label: varchar("label", { length: 100 }).notNull(),
createdBy: varchar("created_by", { length: 64 }).notNull(),
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),
lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),
modifiedBy: varchar("modified_by", { length: 64 }).notNull(),
active: boolean("active").default(true),
}, (table) => ({
speciesIdx: index("idx_call_type_species").on(table.speciesId),
labelIdx: index("idx_call_type_label").on(table.label),
}));
// Filter Table
export const filter = pgTable("filter", {
id: varchar("id", { length: 12 }).primaryKey(), // nanoid
name: varchar("name", { length: 140 }).notNull(),
description: varchar("description", { length: 255 }),
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),
createdBy: varchar("created_by", { length: 64 }).notNull(),
lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),
modifiedBy: varchar("modified_by", { length: 64 }).notNull(),
owner: varchar("owner", { length: 64 }).notNull(),
active: boolean("active").default(true),
});
// Label Table
export const label = pgTable("label", {
id: varchar("id", { length: 21 }).primaryKey(), // nanoid
selectionId: varchar("selection_id", { length: 21 }).notNull().references(() => selection.id),
speciesId: varchar("species_id", { length: 12 }).notNull().references(() => species.id),
filterId: varchar("filter_id", { length: 12 }).references(() => filter.id),
certainty: decimal("certainty", { precision: 5, scale: 2 }),
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),
createdBy: varchar("created_by", { length: 64 }).notNull(),
lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),
modifiedBy: varchar("modified_by", { length: 64 }).notNull(),
active: boolean("active").default(true),
}, (table) => ({
selectionIdIdx: index("idx_label_selection_id").on(table.selectionId),
speciesIdIdx: index("idx_label_species_id").on(table.speciesId),
}));
// Label Sub-type Table
export const labelSubtype = pgTable("label_subtype", {
id: varchar("id", { length: 21 }).primaryKey(), // nanoid
labelId: varchar("label_id", { length: 21 }).notNull().references(() => label.id),
calltypeId: varchar("calltype_id", { length: 12 }).notNull().references(() => callType.id),
filterId: varchar("filter_id", { length: 12 }).references(() => filter.id),
certainty: decimal("certainty", { precision: 5, scale: 2 }),
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),
createdBy: varchar("created_by", { length: 64 }).notNull(),
lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),
modifiedBy: varchar("modified_by", { length: 64 }).notNull(),
active: boolean("active").default(true),
});
// File Sizes Table
export const fileSize = pgTable("file_size", {
fileId: varchar("file_id", { length: 21 }).primaryKey().references(() => file.id),
fileSize: bigint("file_size", { mode: "number" }).notNull(),
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),
lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),
}, (table) => ({
fileIdx: index("idx_file_size_file").on(table.fileId),
}));
// Species to Dataset junction table
export const speciesDataset = pgTable("species_dataset", {
speciesId: varchar("species_id", { length: 12 }).notNull().references(() => species.id),
datasetId: varchar("dataset_id", { length: 12 }).notNull().references(() => dataset.id),
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),
createdBy: varchar("created_by", { length: 64 }).notNull(),
lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),
modifiedBy: varchar("modified_by", { length: 64 }).notNull(),
}, (table) => ({
pk: primaryKey({columns: [table.speciesId, table.datasetId]}),
speciesIdx: index("idx_species_dataset_species").on(table.speciesId),
datasetIdx: index("idx_species_dataset_dataset").on(table.datasetId),
}));
// Access Grant Table
export const accessGrant = pgTable("access_grant", {
id: varchar("id", { length: 12 }).primaryKey(), // nanoid()
datasetId: varchar("dataset_id", { length: 12 }).notNull().references(() => dataset.id),
role: roleNameEnum("role").notNull(),
permission: permissionNameEnum("permission").notNull(),
userId: varchar("user_id", { length: 64 }),
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),
createdBy: varchar("created_by", { length: 64 }).notNull(),
lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),
modifiedBy: varchar("modified_by", { length: 64 }).notNull(),
active: boolean("active").default(true),
}, (table) => ({
uniqueAccessGrant: unique().on(table.datasetId, table.role, table.permission, table.userId),
uniqueRolePermission: uniqueIndex("idx_unique_role_permission")
.on(table.datasetId, table.role, table.permission)
.where(sql`user_id IS NULL`),
datasetIdx: index("idx_access_grant_dataset").on(table.datasetId),
roleIdx: index("idx_access_grant_role").on(table.role),
permissionIdx: index("idx_access_grant_permission").on(table.permission),
userIdx: index("idx_access_grant_user").on(table.userId),
activeIdx: index("idx_access_grant_active").on(table.active),
}));
// User Role Table
export const userRole = pgTable("user_role", {
userId: varchar("user_id", { length: 64 }).primaryKey(),
role: roleNameEnum("role").notNull().default("USER"),
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),
createdBy: varchar("created_by", { length: 64 }).notNull(),
lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),
modifiedBy: varchar("modified_by", { length: 64 }).notNull(),
}, (table) => ({
userRoleIdx: index("idx_user_role").on(table.userId),
}));
-- NOTE: DBML does not like functions and materialised views
-- sql2dbml schema.sql --postgres -o schema.dbml
-- no need to keep a storage url for files, pattern is $prefix/$file_id.opus,
-- this way can change provider anytime.
-- store files and selections on aws, to train models
-- 1 bucket for files, 1 bucket for selections, random 12 digit prefix to filename
-- save files as opus
-- save selections as wav to prevent over compression
-- junction table for files-dataset relationship
-- (file can be a member of many datasets)
-- (selections apply only to 1 dataset)
-- use suncalc.js
-- could use a function for night, on client not db, but want to filter on night
-- in file table use mid point of file as time
-- all times must be zoned to utc
-- dataset type enum
CREATE TYPE dataset_type AS ENUM ('organise', 'test', 'train');
-- Dataset Table
-- Add type column to the dataset table, so that I do not ever mix testing data with training data.
CREATE TABLE dataset (
id VARCHAR(12) PRIMARY KEY, -- nanoid(12)
name VARCHAR(255) NOT NULL,
public BOOLEAN DEFAULT FALSE, -- Whether dataset is publicly accessible
description VARCHAR(255),
created_by VARCHAR(64) NOT NULL, -- kinde user id
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
owner VARCHAR(64) NOT NULL, -- kinde user id
active BOOLEAN DEFAULT TRUE,
type dataset_type NOT NULL DEFAULT 'organise'
);
CREATE INDEX idx_dataset_name ON dataset(name);
CREATE INDEX idx_dataset_owner ON dataset(owner);
CREATE INDEX idx_dataset_active ON dataset(active);
CREATE INDEX idx_dataset_public ON dataset(public);
---------------------------------- 2025-04-09 07:41:08.35534+00 just to test out
---------------------------------- 2025-04-09 07:43:45.115101+00 data
-- Locations Table
CREATE TABLE location (
id VARCHAR(12) PRIMARY KEY, -- nanoid(12)
dataset_id VARCHAR(12) NOT NULL, -- nanoid, link to dataset
name VARCHAR(140) NOT NULL,
latitude DECIMAL(10, 7) CHECK (latitude BETWEEN -90.0 AND 90.0), -- -45.5027
longitude DECIMAL(10, 7) CHECK (longitude BETWEEN -180.0 AND 180.0), -- 167.48406
description VARCHAR(255), -- Limited to 255 characters for efficiency
created_by VARCHAR(64) NOT NULL, -- kinde user id
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
active BOOLEAN DEFAULT TRUE,
FOREIGN KEY (dataset_id) REFERENCES dataset(id)
);
CREATE INDEX idx_location_name ON location(name);
CREATE INDEX idx_location_dataset ON location(dataset_id); -- ??
CREATE INDEX idx_location_active ON location(active); -- ??
CREATE INDEX idx_location_dataset_active ON location(dataset_id, active);
-- Add recording pattern, mainly so it can be searchable,
-- this is an optional field, audio moth needs this, to help
-- with searching and filtering
-- i have 24/7: 1 in 30 and 1 in 40
CREATE TABLE cyclic_recording_pattern (
id VARCHAR(12) PRIMARY KEY, -- nanoid(12)
record_s INTEGER NOT NULL,
sleep_s INTEGER NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL
);
-- Cluster Table (think of a cluster of files as all the files on 1 SD Card)
-- a statistical unit with no major time gaps, to enable call rate stats
-- See changes below, added foreign key on location, added recording pattern
-- added timezone_id for iana timezone id
CREATE TABLE cluster (
id VARCHAR(12) PRIMARY KEY, -- nanoid(12)
dataset_id VARCHAR(12) NOT NULL, -- nanoid, link to dataset
location_id VARCHAR(12) NOT NULL, -- A cluster must have a location, as well as a dataset
name VARCHAR(140) NOT NULL,
description VARCHAR(255), -- Limited to 255 characters for efficiency
created_by VARCHAR(64) NOT NULL, -- kinde user id
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
active BOOLEAN DEFAULT TRUE,
timezone_id VARCHAR(40), -- not required as may need to auto generate cluster, can't always ask or check
cyclic_recording_pattern_id VARCHAR(12),
FOREIGN KEY (dataset_id) REFERENCES dataset(id),
FOREIGN KEY (location_id) REFERENCES location(id),
FOREIGN KEY (cyclic_recording_pattern_id) REFERENCES cyclic_recording_pattern(id)
);
CREATE INDEX idx_cluster_dataset ON cluster(dataset_id);
CREATE INDEX idx_cluster_active ON cluster(active);
CREATE INDEX idx_cluster_dataset_active ON cluster(dataset_id, active);
CREATE INDEX idx_cluster_recording_pattern ON cluster(cyclic_recording_pattern_id);
-- values in my data is medium and medium-high
CREATE TYPE gain_level AS ENUM ('low', 'low-medium', 'medium', 'medium-high', 'high');
-- Files Table (removed dataset_id as now use junction table)
-- Note: timestamp_local should reflect local time, not timestamp in filename, this is important
-- duration must not have more than 3 decimal places, check in ui
CREATE TABLE file (
id VARCHAR(21) PRIMARY KEY, -- nanoid
file_name VARCHAR(255) NOT NULL,
path VARCHAR(255) NULL, -- optional hint for local access
xxh64_hash VARCHAR(16) NOT NULL, -- hash of original file cbe675a69a5fef1c
location_id VARCHAR(12) NOT NULL, -- nanoid, from locations table
timestamp_local TIMESTAMP WITH TIME ZONE NOT NULL, -- parsed from filename, adjust for daylight saving
cluster_id VARCHAR(12), -- nanoid(12), optional if imported one by one
duration DECIMAL(7, 3) NOT NULL CHECK (duration > 0), -- in seconds, allowing for millisecond precision (9999.999s)
sample_rate INTEGER NOT NULL,
description VARCHAR(255), -- Limited to 255 characters for efficiency
storage_prefix VARCHAR(34) NOT NULL, -- prefix to compressed copy in object storage, nanoid(3) customAlphabet
upload BOOLEAN DEFAULT TRUE,
maybe_solar_night BOOLEAN, --calculate with function on client. this is a more accurate value to file table
maybe_civil_night BOOLEAN, --calculate with function on client. this is a more accurate value to file table
moon_phase DECIMAL(3,2) CHECK (moon_phase BETWEEN 0.00 AND 1.00), -- 0.00 to 1.00 (new moon to full moon)
created_by VARCHAR(64) NOT NULL, -- kinde user id
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
active BOOLEAN DEFAULT TRUE,
FOREIGN KEY (location_id) REFERENCES location(id),
FOREIGN KEY (cluster_id) REFERENCES cluster(id)
);
CREATE INDEX idx_file_location ON file(location_id);
CREATE INDEX idx_file_active ON file(active);
CREATE INDEX idx_file_timestamp_local ON file(timestamp_local);
CREATE INDEX idx_file_cluster ON file(cluster_id);
CREATE INDEX idx_file_maybe_solar_night ON file(maybe_solar_night);
CREATE INDEX idx_file_maybe_civil_night ON file(maybe_civil_night);
CREATE TABLE moth_metadata (
file_id VARCHAR(21) PRIMARY KEY,
timestamp TIMESTAMP WITH TIME ZONE NOT NULL,
recorder_id VARCHAR(16), -- 24F31901603710CD (16)
gain gain_level NULL, -- low, medium, high or null
battery_v DECIMAL(2, 1) CHECK (battery_v >= 0), -- for values from 0 to 9.9
temp_c DECIMAL(3, 1), -- e.g., 24.2
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL, -- kinde user id
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
FOREIGN KEY (file_id) REFERENCES file(id)
);
CREATE TABLE file_metadata (
file_id VARCHAR(21) PRIMARY KEY,
json JSONB, -- For noise levels and other file-level metadata
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL, -- kinde user id
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
FOREIGN KEY (file_id) REFERENCES file(id)
);
CREATE INDEX idx_file_metadata_json ON file_metadata USING gin(json);
-- Junction Table for Files to Dataset (many-to-many)
CREATE TABLE file_dataset (
file_id VARCHAR(21) NOT NULL,
dataset_id VARCHAR(12) NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL, -- kinde user id
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
PRIMARY KEY (file_id, dataset_id),
FOREIGN KEY (file_id) REFERENCES file(id),
FOREIGN KEY (dataset_id) REFERENCES dataset(id)
);
-- indexes for the junction table
CREATE INDEX idx_file_dataset_file ON file_dataset(file_id);
CREATE INDEX idx_file_dataset_dataset ON file_dataset(dataset_id);
-- Selection Table
CREATE TABLE selection(
id VARCHAR(21) PRIMARY KEY, -- nanoid
file_id VARCHAR(21) NOT NULL, -- nanoid
dataset_id VARCHAR(12) NOT NULL, -- nanoid, link to dataset
start_time DECIMAL(7,3) NOT NULL, --up to 9999.999 seconds
end_time DECIMAL(7,3) NOT NULL, -- up to 9999.999 seconds
freq_low DECIMAL(9,3) CHECK (freq_low < 300000), -- LOOK AT CHECK
freq_high DECIMAL(9,3) CHECK (freq_high < 300000), -- LOOK AT CHECK
description VARCHAR(255), -- Limited to 255 characters for efficiency
storage_prefix VARCHAR(34) NOT NULL, -- prefix to compressed copy in object storage, nanoid customAlphabet
upload BOOLEAN DEFAULT TRUE,
approved BOOLEAN DEFAULT FALSE, -- Approval status by dataset owner
is_solar_night BOOLEAN, --calculate with function on client. this is a more accurate value to file table
is_civil_night BOOLEAN, --calculate with function on client. this is a more accurate value to file table
moon_phase DECIMAL(3,2) CHECK (moon_phase BETWEEN 0.00 AND 1.00), -- 0.00 to 1.00 (new moon to full moon)
created_by VARCHAR(64) NOT NULL, -- kinde user id
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
active BOOLEAN DEFAULT TRUE,
FOREIGN KEY (file_id) REFERENCES file(id),
FOREIGN KEY (dataset_id) REFERENCES dataset(id)
);
CREATE INDEX idx_selection_file ON selection(file_id);
CREATE INDEX idx_selection_dataset ON selection(dataset_id);
CREATE INDEX idx_selection_active ON selection(active);
CREATE INDEX idx_selection_approved ON selection(approved);
CREATE INDEX idx_selection_is_solar_night ON selection(is_solar_night);
CREATE INDEX idx_selection_is_civil_night ON selection(is_civil_night);
CREATE INDEX idx_selection_moon_phase ON selection(moon_phase);
CREATE TABLE selection_metadata (
selection_id VARCHAR(21) PRIMARY KEY,
json JSONB, -- for loudness, noise, and other selection-level metadaata
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL, -- kinde user id
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
FOREIGN KEY (selection_id) REFERENCES selection(id)
);
CREATE INDEX idx_selection_metadata_json ON selection_metadata USING gin(json);
-- eBird Taxonomy Table
-- will need to update INDEX too when introducing a new version
-- see working with ebird taxonomies, aichat, deepseek, macbook
-- see materialised view and index on it
-- see alter table stuff below, modifications
CREATE TABLE ebird_taxonomy (
id VARCHAR(12) PRIMARY KEY,
taxonomy_version VARCHAR(4) NOT NULL,
taxon_order INTEGER NOT NULL,
category VARCHAR(15) NOT NULL,
species_code VARCHAR(15) NOT NULL,
taxon_concept_id VARCHAR(15),
primary_com_name VARCHAR(100) NOT NULL,
sci_name VARCHAR(100) NOT NULL,
bird_order VARCHAR(30),
family VARCHAR(100),
species_group VARCHAR(100),
report_as VARCHAR(15),
valid_from DATE NOT NULL,
valid_to DATE,
UNIQUE (species_code, taxonomy_version)
);
-- Species Table (mutable), removed link to dataset_id and added owner field
CREATE TABLE species (
id VARCHAR(12) PRIMARY KEY, -- nanoid(12)
label VARCHAR(100) NOT NULL, -- display label for the species
ebird_code VARCHAR(12), -- link to ebird taxonomy
taxonomy_version VARCHAR(4),
description VARCHAR(255),
created_by VARCHAR(64) NOT NULL, -- kinde user id
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
active BOOLEAN DEFAULT TRUE,
owner VARCHAR(64) NOT NULL,
FOREIGN KEY (ebird_code, taxonomy_version) REFERENCES ebird_taxonomy(species_code, taxonomy_version)
);
CREATE INDEX idx_species_label ON species(label);
CREATE INDEX idx_species_ebird ON species(ebird_code);
-- Call Types Table (mutable)
CREATE TABLE call_type (
id VARCHAR(12) PRIMARY KEY, -- nanoid(12)
species_id VARCHAR(12) NOT NULL, -- link to parent species
label VARCHAR(100) NOT NULL, -- display name like "male", "female", "duet"
created_by VARCHAR(64) NOT NULL, -- kinde user id
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
active BOOLEAN DEFAULT TRUE,
FOREIGN KEY (species_id) REFERENCES species(id)
);
CREATE INDEX idx_call_type_species ON call_type(species_id);
CREATE INDEX idx_call_type_label ON call_type(label);
CREATE TABLE filter (
id VARCHAR(12) PRIMARY KEY, -- nanoid
name VARCHAR(140) NOT NULL,
description VARCHAR(255),
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL, -- kinde user id
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
owner VARCHAR(64) NOT NULL,
active BOOLEAN NOT NULL DEFAULT true
);
-- Label Table, many to 1 relationship with
CREATE TABLE label (
id VARCHAR(21) PRIMARY KEY, -- nanoid
selection_id VARCHAR(21) NOT NULL, -- link to selection table
species_id VARCHAR(12) NOT NULL, -- link to species table
filter_id VARCHAR(12),
certainty DECIMAL(5,2) CHECK (certainty <= 100 AND certainty >= 0),
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL, -- kinde user id
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
active BOOLEAN NOT NULL DEFAULT true,
FOREIGN KEY (selection_id) REFERENCES selection(id),
FOREIGN KEY (species_id) REFERENCES species(id),
FOREIGN KEY (filter_id) REFERENCES filter(id)
);
CREATE INDEX idx_label_selection_id ON label(selection_id);
CREATE INDEX idx_label_species_id ON label(species_id);
-- Label Sub-type Table (optional 1:1 relationship with label)
CREATE TABLE label_subtype (
id VARCHAR(21) PRIMARY KEY, -- nanoid
label_id VARCHAR(21) NOT NULL, -- link to parent label
calltype_id VARCHAR(12) NOT NULL, -- link to call_type table
filter_id VARCHAR(12),
certainty DECIMAL(5,2) CHECK (certainty <= 100 AND certainty >= 0),
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL, -- kinde user id
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
active BOOLEAN NOT NULL DEFAULT true,
FOREIGN KEY (label_id) REFERENCES label(id),
FOREIGN KEY (calltype_id) REFERENCES call_type(id),
FOREIGN KEY (filter_id) REFERENCES filter(id)
-- UNIQUE (label_id) -- ensures 1:1 relationship with label. how do i handle multiple call type filters? needs 1:many relation
);
-- File Sizes Table (populated by batch job querying AWS)(mutable)
CREATE TABLE file_size (
file_id VARCHAR(21) PRIMARY KEY, -- nanoid, matching the files table
file_size BIGINT NOT NULL, -- size in bytes (using BIGINT just in case files > 2GB)
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (file_id) REFERENCES file(id)
);
CREATE INDEX idx_file_size_file ON file_size(file_id);
-- file_dataset must exist first
-- Referential Integrity for Selections
-- To ensure `selections.dataset_id` is valid for the associated file:
-- Add composite foreign key (requires file_dataset to exist first)
ALTER TABLE selection ADD CONSTRAINT fk_selection_file_dataset
FOREIGN KEY (file_id, dataset_id) REFERENCES file_dataset(file_id, dataset_id);
-- 2024 Taxonomy View, after populating underlying table
-- see working with ebird taxonomies, aichat, deepseek, macbook
-- I think I still need this to display the options when someone creates a species
CREATE MATERIALIZED VIEW ebird_taxonomy_v2024 AS
SELECT
id,
species_code,
primary_com_name,
sci_name,
bird_order, -- AS "order", order is reserved word in pgsql
family
FROM ebird_taxonomy
WHERE taxonomy_version = '2024';
-- to help with plain text search on common name and scientific name
CREATE INDEX idx_ebird_name_search ON ebird_taxonomy_v2024 USING gin(to_tsvector('english', primary_com_name || ' ' || sci_name));
CREATE INDEX ebird_taxonomy_species_code ON ebird_taxonomy_v2024(species_code);
-- Junction Table for Species to Dataset (many-to-many)
CREATE TABLE species_dataset (
species_id VARCHAR(21) NOT NULL, -- NB: should be 12 !!!!!!!!!!!!!!!!!!!
dataset_id VARCHAR(12) NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL, -- kinde user id
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
PRIMARY KEY (species_id, dataset_id),
FOREIGN KEY (species_id) REFERENCES species(id),
FOREIGN KEY (dataset_id) REFERENCES dataset(id)
);
-- indexes for the junction table
CREATE INDEX idx_species_dataset_species ON species_dataset(species_id);
CREATE INDEX idx_species_dataset_dataset ON species_dataset(dataset_id);
-- Dataset level permissions
----------------------------
-- public switch in dataset table to remain unimplemented, default private
-- check that this is secure, it is simple though
-- Roles:
-- ADMIN role is granted to paid tier, create datasets, upload files and
-- USER role is grannted to free tier, read only except can make new selections
-- Domain:Pesmission
-- Permission for a domain can be: Read, Manage, Delete
-- ??not able to delete an access grant, can only modify
-- ??this way we maintain some semblance of an history
-- datasets are soft deleted therefore can be reactivated
-- role grants will then still exist
-- paid can be implemented with permission in the kinde token
-- Permissions are assigned to roles, and roles are assigned to users. This makes it easier to manage permissions for groups of users.
-- The `user_id` field in the `access_grant` table allows you to override the default permissions for a specific user. For example, you might give a specific user ADMIN access to a dataset, even if their general role is USER. If the `user_id` is null, that means the role is applied across the dataset for all users with that role.
-- Access is controlled at the dataset level. A user's ability to perform actions (read, manage, delete) depends on the permissions granted to their role (or to them directly) for a *specific* dataset.
-- The `active` flag in `access_grant` provides a soft-delete mechanism. Instead of deleting a grant, you set `active = FALSE`. This preserves the history of access grants and is crucial for auditing and compliance.
-- integrate Kinde authentication system to retrieve user roles and IDs. The Kinde token should contain information about the user's role. The application then uses the `user_id` from the token when querying the `access_grant` table.
-- application code will need to perform checks against the `access_grant` table to determine if a user has the necessary permissions to perform an action on a specific dataset. This involves a function or middleware that intercepts requests and verifies permissions.
-- Important: Order matters
-- ENUMs to enforce valid roles/permissions
CREATE TYPE role_name AS ENUM ('ADMIN', 'USER', 'CURATOR');
CREATE TYPE permission_name AS ENUM ('READ', 'UPLOAD', 'DOWNLOAD', 'EDIT', 'DELETE');
-- Access Grant Table (Associates roles, users, datasets, and permissions)
CREATE TABLE access_grant (
id VARCHAR(12) PRIMARY KEY, -- nanoid()
dataset_id VARCHAR(12) NOT NULL, --The dataset this grant applies to
role role_name NOT NULL, --The role this grant applies to
permission permission_name NOT NULL, --The permission being granted
user_id VARCHAR(64), --Optional, Grants permission to a specific user (overrides role). If `NULL`, the grant applies to *all* users with the specified role for the dataset.
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL, -- kinde user id
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
active BOOLEAN DEFAULT TRUE, --allows you to effectively "revoke" access without deleting the grant record, preserving history
FOREIGN KEY (dataset_id) REFERENCES dataset(id),
CONSTRAINT unique_access_grant UNIQUE (dataset_id, role, permission, user_id) --prevents redundant or conflicting access grants
);
-- to prevent duplicate role-based permissions when user_id is NULL
CREATE UNIQUE INDEX idx_unique_role_permission
ON access_grant(dataset_id, role, permission)
WHERE user_id IS NULL;
CREATE INDEX idx_access_grant_dataset ON access_grant(dataset_id);
CREATE INDEX idx_access_grant_role ON access_grant(role);
CREATE INDEX idx_access_grant_permission ON access_grant(permission);
CREATE INDEX idx_access_grant_user ON access_grant(user_id);
CREATE INDEX idx_access_grant_active ON access_grant(active);
CREATE TABLE user_role (
user_id VARCHAR(64) PRIMARY KEY, -- kinde user_id, user can have 1 role only
role role_name NOT NULL DEFAULT 'USER',
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL, -- kinde user id
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL -- kinde user id
);
CREATE INDEX idx_user_role ON user_role(user_id);
-- Section below conflicts with sql2dbml (excl triggers, not in graph though)
--------------------------------------------------------------------------------
-- Function to update last_modified timestamp
-- CREATE OR REPLACE FUNCTION update_last_modified()
-- RETURNS TRIGGER AS $$
-- BEGIN
-- NEW.last_modified = CURRENT_TIMESTAMP;
-- RETURN NEW;
-- END;
-- $$ LANGUAGE plpgsql;
-- Trigger for file_size table
-- CREATE TRIGGER update_file_size_last_modified
-- BEFORE UPDATE ON file_size
-- FOR EACH ROW
-- EXECUTE FUNCTION update_last_modified();
-- Missing some triggers here, need a trigger for:
-- dataset
-- location
-- cluster
-- file
-- moth_metadata
-- file_metadata
-- file_dataset
-- selection
-- selection_metadata
-- species
-- call_type
-- label
-- label_subtype
-- access_grant
-- astro
-- file_size (implemented above)
-- sq2dbml conflict ends
---------------------------------------------------------------------------
-- dataset type enum
CREATE TYPE dataset_type AS ENUM ('organise', 'test', 'train');
-- Dataset Table
CREATE TABLE dataset (
id VARCHAR(12) PRIMARY KEY, -- nanoid(12)
name VARCHAR(255) NOT NULL,
public BOOLEAN DEFAULT FALSE, -- Whether dataset is publicly accessible
description VARCHAR(255),
created_by VARCHAR(64) NOT NULL, -- kinde user id
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
owner VARCHAR(64) NOT NULL, -- kinde user id
active BOOLEAN DEFAULT TRUE,
type dataset_type NOT NULL DEFAULT 'organise'
);
CREATE INDEX idx_dataset_name ON dataset(name);
CREATE INDEX idx_dataset_owner ON dataset(owner);
CREATE INDEX idx_dataset_active ON dataset(active);
CREATE INDEX idx_dataset_public ON dataset(public);
SELECT NOW();
-- 2025-04-09 07:41:08.35534+00 enum, table, indexes
-- add dataset [x]
-- copy csv into duckdb using cli if needed to sanitise
-- COPY dataset FROM 'dataset.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER);
-- COPY dataset TO 'dataset.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER, NULL '');
-- upload to neon
\copy dataset FROM 'dataset.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 07:43:45.115101+00
-- Locations Table
CREATE TABLE location (
id VARCHAR(12) PRIMARY KEY, -- nanoid(12)
dataset_id VARCHAR(12) NOT NULL, -- nanoid, link to dataset
name VARCHAR(140) NOT NULL,
latitude DECIMAL(10, 7) CHECK (latitude BETWEEN -90.0 AND 90.0), -- -45.5027
longitude DECIMAL(10, 7) CHECK (longitude BETWEEN -180.0 AND 180.0), -- 167.48406
description VARCHAR(255), -- Limited to 255 characters for efficiency
created_by VARCHAR(64) NOT NULL, -- kinde user id
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
active BOOLEAN DEFAULT TRUE,
FOREIGN KEY (dataset_id) REFERENCES dataset(id)
);
SELECT NOW();
-- 2025-04-09 15:11:48.100717+00
CREATE INDEX idx_location_name ON location(name);
CREATE INDEX idx_location_dataset ON location(dataset_id); -- ??
CREATE INDEX idx_location_active ON location(active); -- ??
CREATE INDEX idx_location_dataset_active ON location(dataset_id, active);
SELECT NOW();
-- 2025-04-09 15:13:04.925533+00
CREATE TABLE cyclic_recording_pattern (
id VARCHAR(12) PRIMARY KEY, -- nanoid(12)
record_s INTEGER NOT NULL,
sleep_s INTEGER NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL
);
SELECT NOW();
-- 2025-04-09 15:13:45.288681+00
CREATE TABLE cluster (
id VARCHAR(12) PRIMARY KEY, -- nanoid(12)
dataset_id VARCHAR(12) NOT NULL, -- nanoid, link to dataset
location_id VARCHAR(12) NOT NULL, -- A cluster must have a location, as well as a dataset
name VARCHAR(140) NOT NULL,
description VARCHAR(255), -- Limited to 255 characters for efficiency
created_by VARCHAR(64) NOT NULL, -- kinde user id
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
active BOOLEAN DEFAULT TRUE,
timezone_id VARCHAR(40), -- not required as may need to auto generate cluster, can't always ask or check
cyclic_recording_pattern_id VARCHAR(12),
FOREIGN KEY (dataset_id) REFERENCES dataset(id),
FOREIGN KEY (location_id) REFERENCES location(id),
FOREIGN KEY (cyclic_recording_pattern_id) REFERENCES cyclic_recording_pattern(id)
);
SELECT NOW();
--2025-04-09 15:15:26.042802+00
CREATE INDEX idx_cluster_dataset ON cluster(dataset_id);
CREATE INDEX idx_cluster_active ON cluster(active);
CREATE INDEX idx_cluster_dataset_active ON cluster(dataset_id, active);
CREATE INDEX idx_cluster_recording_pattern ON cluster(cyclic_recording_pattern_id);
SELECT NOW();
-- 2025-04-09 15:16:21.964596+00
-- values in my data is medium and medium-high
CREATE TYPE gain_level AS ENUM ('low', 'low-medium', 'medium', 'medium-high', 'high');
SELECT NOW();
-- 2025-04-09 15:17:52.262936+00
CREATE TABLE file (
id VARCHAR(21) PRIMARY KEY, -- nanoid
file_name VARCHAR(255) NOT NULL,
path VARCHAR(255) NULL, -- optional hint for local access
xxh64_hash VARCHAR(16) NOT NULL, -- hash of original file cbe675a69a5fef1c
location_id VARCHAR(12) NOT NULL, -- nanoid, from locations table
timestamp_local TIMESTAMP WITH TIME ZONE NOT NULL, -- parsed from filename, adjust for daylight saving
cluster_id VARCHAR(12), -- nanoid(12), optional if imported one by one
duration DECIMAL(7, 3) NOT NULL CHECK (duration > 0), -- in seconds, allowing for millisecond precision (9999.999s)
sample_rate INTEGER NOT NULL,
description VARCHAR(255), -- Limited to 255 characters for efficiency
storage_prefix VARCHAR(34) NOT NULL, -- prefix to compressed copy in object storage, nanoid(3) customAlphabet
upload BOOLEAN DEFAULT TRUE,
maybe_solar_night BOOLEAN, --calculate with function on client. this is a more accurate value to file table
maybe_civil_night BOOLEAN, --calculate with function on client. this is a more accurate value to file table
moon_phase DECIMAL(3,2) CHECK (moon_phase BETWEEN 0.00 AND 1.00), -- 0.00 to 1.00 (new moon to full moon)
created_by VARCHAR(64) NOT NULL, -- kinde user id
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
active BOOLEAN DEFAULT TRUE,
FOREIGN KEY (location_id) REFERENCES location(id),
FOREIGN KEY (cluster_id) REFERENCES cluster(id)
);
SELECT NOW();
-- 2025-04-09 15:39:18.281913+00
CREATE INDEX idx_file_location ON file(location_id);
CREATE INDEX idx_file_active ON file(active);
CREATE INDEX idx_file_timestamp_local ON file(timestamp_local);
CREATE INDEX idx_file_cluster ON file(cluster_id);
CREATE INDEX idx_file_maybe_solar_night ON file(maybe_solar_night);
CREATE INDEX idx_file_maybe_civil_night ON file(maybe_civil_night);
SELECT NOW();
-- 2025-04-09 15:40:07.82481+00
CREATE TABLE moth_metadata (
file_id VARCHAR(21) PRIMARY KEY,
timestamp TIMESTAMP WITH TIME ZONE NOT NULL,
recorder_id VARCHAR(16), -- 24F31901603710CD (16)
gain gain_level NULL, -- low, medium, high or null
battery_v DECIMAL(2, 1) CHECK (battery_v >= 0), -- for values from 0 to 9.9
temp_c DECIMAL(3, 1), -- e.g., 24.2
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL, -- kinde user id
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
FOREIGN KEY (file_id) REFERENCES file(id)
);
SELECT NOW();
-- 2025-04-09 15:40:51.607083+00
CREATE TABLE file_metadata (
file_id VARCHAR(21) PRIMARY KEY,
json JSONB, -- For noise levels and other file-level metadata
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL, -- kinde user id
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
FOREIGN KEY (file_id) REFERENCES file(id)
);
SELECT NOW();
-- 2025-04-09 15:41:54.236904+00
CREATE INDEX idx_file_metadata_json ON file_metadata USING gin(json);
SELECT NOW();
-- 2025-04-09 15:42:39.410136+00
-- Junction Table for Files to Dataset (many-to-many)
CREATE TABLE file_dataset (
file_id VARCHAR(21) NOT NULL,
dataset_id VARCHAR(12) NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL, -- kinde user id
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
PRIMARY KEY (file_id, dataset_id),
FOREIGN KEY (file_id) REFERENCES file(id),
FOREIGN KEY (dataset_id) REFERENCES dataset(id)
);
SELECT NOW();
-- 2025-04-09 15:43:13.267234+00
-- indexes for the junction table
CREATE INDEX idx_file_dataset_file ON file_dataset(file_id);
CREATE INDEX idx_file_dataset_dataset ON file_dataset(dataset_id);
SELECT NOW();
-- 2025-04-09 15:43:41.62687+00
-- Selection Table
CREATE TABLE selection(
id VARCHAR(21) PRIMARY KEY, -- nanoid
file_id VARCHAR(21) NOT NULL, -- nanoid
dataset_id VARCHAR(12) NOT NULL, -- nanoid, link to dataset
start_time DECIMAL(7,3) NOT NULL, --up to 9999.999 seconds
end_time DECIMAL(7,3) NOT NULL, -- up to 9999.999 seconds
freq_low DECIMAL(9,3) CHECK (freq_low < 300000), -- LOOK AT CHECK
freq_high DECIMAL(9,3) CHECK (freq_high < 300000), -- LOOK AT CHECK
description VARCHAR(255), -- Limited to 255 characters for efficiency
storage_prefix VARCHAR(34) NOT NULL, -- prefix to compressed copy in object storage, nanoid customAlphabet
upload BOOLEAN DEFAULT TRUE,
approved BOOLEAN DEFAULT FALSE, -- Approval status by dataset owner
is_solar_night BOOLEAN, --calculate with function on client. this is a more accurate value to file table
is_civil_night BOOLEAN, --calculate with function on client. this is a more accurate value to file table
moon_phase DECIMAL(3,2) CHECK (moon_phase BETWEEN 0.00 AND 1.00), -- 0.00 to 1.00 (new moon to full moon)
created_by VARCHAR(64) NOT NULL, -- kinde user id
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
active BOOLEAN DEFAULT TRUE,
FOREIGN KEY (file_id) REFERENCES file(id),
FOREIGN KEY (dataset_id) REFERENCES dataset(id)
);
SELECT NOW();
-- 2025-04-09 15:44:10.434171+00
CREATE INDEX idx_selection_file ON selection(file_id);
CREATE INDEX idx_selection_dataset ON selection(dataset_id);
CREATE INDEX idx_selection_active ON selection(active);
CREATE INDEX idx_selection_approved ON selection(approved);
CREATE INDEX idx_selection_is_solar_night ON selection(is_solar_night);
CREATE INDEX idx_selection_is_civil_night ON selection(is_civil_night);
CREATE INDEX idx_selection_moon_phase ON selection(moon_phase);
SELECT NOW();
-- 2025-04-09 15:44:43.419907+00
CREATE TABLE selection_metadata (
selection_id VARCHAR(21) PRIMARY KEY,
json JSONB, -- for loudness, noise, and other selection-level metadaata
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL, -- kinde user id
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
FOREIGN KEY (selection_id) REFERENCES selection(id)
);
SELECT NOW();
-- 2025-04-09 15:45:28.370066+00
CREATE INDEX idx_selection_metadata_json ON selection_metadata USING gin(json);
SELECT NOW();
-- 2025-04-09 15:45:52.629629+00
-- eBird Taxonomy Table
-- will need to update INDEX too when introducing a new version
-- see working with ebird taxonomies, aichat, deepseek, macbook
-- see materialised view and index on it
CREATE TABLE ebird_taxonomy (
id VARCHAR(12) PRIMARY KEY,
taxonomy_version VARCHAR(4) NOT NULL,
taxon_order INTEGER NOT NULL,
category VARCHAR(15) NOT NULL,
species_code VARCHAR(15) NOT NULL,
taxon_concept_id VARCHAR(15),
primary_com_name VARCHAR(100) NOT NULL,
sci_name VARCHAR(100) NOT NULL,
bird_order VARCHAR(30),
family VARCHAR(100),
species_group VARCHAR(100),
report_as VARCHAR(15),
valid_from DATE NOT NULL,
valid_to DATE,
UNIQUE (species_code, taxonomy_version)
);
SELECT NOW();
-- 2025-04-09 15:46:34.74185+00
-- Species Table (mutable), removed link to dataset_id and added owner field
CREATE TABLE species (
id VARCHAR(12) PRIMARY KEY, -- nanoid(12)
label VARCHAR(100) NOT NULL, -- display label for the species
ebird_code VARCHAR(12), -- link to ebird taxonomy
taxonomy_version VARCHAR(4),
description VARCHAR(255),
created_by VARCHAR(64) NOT NULL, -- kinde user id
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
active BOOLEAN DEFAULT TRUE,
owner VARCHAR(64) NOT NULL,
FOREIGN KEY (ebird_code, taxonomy_version) REFERENCES ebird_taxonomy(species_code, taxonomy_version)
);
SELECT NOW();
-- 2025-04-09 15:47:03.599141+00
CREATE INDEX idx_species_label ON species(label);
CREATE INDEX idx_species_ebird ON species(ebird_code);
SELECT NOW();
-- 2025-04-09 15:47:30.319107+00
-- Call Types Table (mutable)
CREATE TABLE call_type (
id VARCHAR(12) PRIMARY KEY, -- nanoid(12)
species_id VARCHAR(12) NOT NULL, -- link to parent species
label VARCHAR(100) NOT NULL, -- display name like "male", "female", "duet"
created_by VARCHAR(64) NOT NULL, -- kinde user id
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
active BOOLEAN DEFAULT TRUE,
FOREIGN KEY (species_id) REFERENCES species(id)
);
SELECT NOW();
-- 2025-04-09 15:47:57.081879+00
CREATE INDEX idx_call_type_species ON call_type(species_id);
CREATE INDEX idx_call_type_label ON call_type(label);
SELECT NOW();
-- 2025-04-09 15:48:24.381661+00
CREATE TABLE filter (
id VARCHAR(12) PRIMARY KEY, -- nanoid
name VARCHAR(140) NOT NULL,
description VARCHAR(255),
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL, -- kinde user id
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
owner VARCHAR(64) NOT NULL,
active BOOLEAN NOT NULL DEFAULT true
);
SELECT NOW();
-- 2025-04-09 15:49:27.103164+00
-- Label Table, many to 1 relationship with
CREATE TABLE label (
id VARCHAR(21) PRIMARY KEY, -- nanoid
selection_id VARCHAR(21) NOT NULL, -- link to selection table
species_id VARCHAR(12) NOT NULL, -- link to species table
filter_id VARCHAR(12),
certainty DECIMAL(5,2) CHECK (certainty <= 100 AND certainty >= 0),
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL, -- kinde user id
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
active BOOLEAN NOT NULL DEFAULT true,
FOREIGN KEY (selection_id) REFERENCES selection(id),
FOREIGN KEY (species_id) REFERENCES species(id),
FOREIGN KEY (filter_id) REFERENCES filter(id)
);
SELECT NOW();
-- 2025-04-09 15:50:01.91386+00
CREATE INDEX idx_label_selection_id ON label(selection_id);
CREATE INDEX idx_label_species_id ON label(species_id);
SELECT NOW();
-- 2025-04-09 15:50:21.256066+0
-- Label Sub-type Table (optional 1:1 relationship with label)
CREATE TABLE label_subtype (
id VARCHAR(21) PRIMARY KEY, -- nanoid
label_id VARCHAR(21) NOT NULL, -- link to parent label
calltype_id VARCHAR(12) NOT NULL, -- link to call_type table
filter_id VARCHAR(12),
certainty DECIMAL(5,2) CHECK (certainty <= 100 AND certainty >= 0),
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL, -- kinde user id
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
active BOOLEAN NOT NULL DEFAULT true,
FOREIGN KEY (label_id) REFERENCES label(id),
FOREIGN KEY (calltype_id) REFERENCES call_type(id),
FOREIGN KEY (filter_id) REFERENCES filter(id)
);
SELECT NOW();
-- 2025-04-09 15:51:03.626905+00
-- File Sizes Table (populated by batch job querying AWS)(mutable)
CREATE TABLE file_size (
file_id VARCHAR(21) PRIMARY KEY, -- nanoid, matching the files table
file_size BIGINT NOT NULL, -- size in bytes (using BIGINT just in case files > 2GB)
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (file_id) REFERENCES file(id)
);
SELECT NOW();
-- 2025-04-09 15:51:33.005948+00
CREATE INDEX idx_file_size_file ON file_size(file_id);
SELECT NOW();
-- 2025-04-09 15:52:01.304821+00
-- file_dataset must exist first
-- Referential Integrity for Selections
-- To ensure `selections.dataset_id` is valid for the associated file:
-- Add composite foreign key (requires file_dataset to exist first)
ALTER TABLE selection ADD CONSTRAINT fk_selection_file_dataset
FOREIGN KEY (file_id, dataset_id) REFERENCES file_dataset(file_id, dataset_id);
SELECT NOW();
-- 2025-04-09 15:52:42.744196+00
\COPY ebird_taxonomy FROM 'eBird_taxonomy_v2024.tsv' WITH (FORMAT csv, DELIMITER E'\t', HEADER, QUOTE '"', ESCAPE '\', NULL '');
SELECT NOW();
-- 2025-04-09 15:54:42.886859+00
-- 2024 Taxonomy View, after populating underlying table
-- see working with ebird taxonomies, aichat, deepseek, macbook
-- I need this to display the options when someone creates a species
CREATE MATERIALIZED VIEW ebird_taxonomy_v2024 AS
SELECT
id,
species_code,
primary_com_name,
sci_name,
bird_order, -- AS "order", order is reserved word in pgsql
family
FROM ebird_taxonomy
WHERE taxonomy_version = '2024';
SELECT NOW();
-- 2025-04-09 16:02:51.057481+00
-- to help with plain text search on common name and scientific name
CREATE INDEX idx_ebird_name_search ON ebird_taxonomy_v2024 USING gin(to_tsvector('english', primary_com_name || ' ' || sci_name));
CREATE INDEX ebird_taxonomy_species_code ON ebird_taxonomy_v2024(species_code);
SELECT NOW();
-- 2025-04-09 16:03:26.640118+00
-- Junction Table for Species to Dataset (many-to-many)
CREATE TABLE species_dataset (
species_id VARCHAR(21) NOT NULL,
dataset_id VARCHAR(12) NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL, -- kinde user id
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
PRIMARY KEY (species_id, dataset_id),
FOREIGN KEY (species_id) REFERENCES species(id),
FOREIGN KEY (dataset_id) REFERENCES dataset(id)
);
SELECT NOW();
-- 2025-04-09 16:04:00.064375+00
-- indexes for the junction table
CREATE INDEX idx_species_dataset_species ON species_dataset(species_id);
CREATE INDEX idx_species_dataset_dataset ON species_dataset(dataset_id);
SELECT NOW();
-- 2025-04-09 16:04:23.530128+00
CREATE TYPE role_name AS ENUM ('ADMIN', 'USER', 'CURATOR');
CREATE TYPE permission_name AS ENUM ('READ', 'UPLOAD', 'DOWNLOAD', 'EDIT', 'DELETE');
SELECT NOW();
-- 2025-04-09 16:05:18.144071+00
-- Access Grant Table (Associates roles, users, datasets, and permissions)
CREATE TABLE access_grant (
id VARCHAR(12) PRIMARY KEY, -- nanoid()
dataset_id VARCHAR(12) NOT NULL, --The dataset this grant applies to
role role_name NOT NULL, --The role this grant applies to
permission permission_name NOT NULL, --The permission being granted
user_id VARCHAR(64), --Optional, Grants permission to a specific user (overrides role). If `NULL`, the grant applies to *all* users with the specified role for the dataset.
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL, -- kinde user id
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL, -- kinde user id
active BOOLEAN DEFAULT TRUE, --allows you to effectively "revoke" access without deleting the grant record, preserving history
FOREIGN KEY (dataset_id) REFERENCES dataset(id),
CONSTRAINT unique_access_grant UNIQUE (dataset_id, role, permission, user_id) --prevents redundant or conflicting access grants
);
SELECT NOW();
-- 2025-04-09 16:05:52.864153+00
-- to prevent duplicate role-based permissions when user_id is NULL
CREATE UNIQUE INDEX idx_unique_role_permission
ON access_grant(dataset_id, role, permission)
WHERE user_id IS NULL;
SELECT NOW();
-- 2025-04-09 16:06:26.067281+00
CREATE INDEX idx_access_grant_dataset ON access_grant(dataset_id);
CREATE INDEX idx_access_grant_role ON access_grant(role);
CREATE INDEX idx_access_grant_permission ON access_grant(permission);
CREATE INDEX idx_access_grant_user ON access_grant(user_id);
CREATE INDEX idx_access_grant_active ON access_grant(active);
SELECT NOW();
-- 2025-04-09 16:06:55.141239+00
CREATE TABLE user_role (
user_id VARCHAR(64) PRIMARY KEY, -- kinde user_id, user can have 1 role only
role role_name NOT NULL DEFAULT 'USER',
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(64) NOT NULL, -- kinde user id
last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
modified_by VARCHAR(64) NOT NULL -- kinde user id
);
SELECT NOW();
-- 2025-04-09 16:07:16.366116+00
CREATE INDEX idx_user_role ON user_role(user_id);
SELECT NOW();
-- 2025-04-09 16:08:04.699188+00
-- add location [x]
-- copy csv into duckdb using cli if needed to sanitise
-- COPY location FROM 'location.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER);
-- COPY location TO 'location.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER, NULL '');
-- upload to neon
\copy location FROM 'location.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 16:12:13.798421+00
-- add cyclic_recording_pattern [x]
-- copy csv into duckdb using cli if needed to sanitise
-- COPY cyclic_recording_pattern FROM 'cyclic_recording_pattern.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER);
-- COPY cyclic_recording_pattern TO 'cyclic_recording_pattern.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER, NULL '');
-- upload to neon
\copy cyclic_recording_pattern FROM 'cyclic_recording_pattern.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 16:12:43.958352+00
-- add cluster [x]
-- copy csv into duckdb using cli if needed to sanitise
-- COPY cluster FROM 'cluster.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER);
-- COPY cluster TO 'cluster.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER, NULL '');
-- upload to neon
\copy cluster FROM 'cluster.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 16:13:46.196433+00
-- add file
-- copy csv into duckdb using cli if needed to sanitise
-- COPY file FROM 'file.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER);
-- COPY file TO 'file.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER, NULL '');
-- upload to neon
\copy file FROM 'file3.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 16:49:21.294746+00
-- add moth_metadata
-- copy csv into duckdb using cli if needed to sanitise
-- COPY moth_metadata FROM 'moth_metadata.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER);
-- COPY moth_metadata TO 'moth_metadata.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER, NULL '');
-- upload to neon
\copy moth_metadata FROM 'moth_metadata.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 17:18:36.966633+00
-- add file_dataset
-- copy csv into duckdb using cli if needed to sanitise
-- COPY file_dataset FROM 'file_dataset.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER);
-- COPY file_dataset TO 'file_dataset.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER, NULL '');
-- upload to neon
\copy file_dataset FROM 'file_dataset.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 17:37:03.178435+00
-- add species
-- copy csv into duckdb using cli if needed to sanitise
-- COPY species FROM 'species.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER);
-- COPY species TO 'species.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER, NULL '');
-- upload to neon
\copy species FROM 'species.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 17:52:35.538073+00
-- add call_type
-- upload to neon
\copy call_type FROM 'call_type.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 17:53:24.384375+00
-- add filter data
-- upload to neon
\copy filter FROM 'filter.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 17:53:58.180513+00
-- file_metadata_avianz.tsv
\copy file_metadata FROM 'file_metadata_avianz.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 17:54:56.536164+00
\copy selection FROM 'selection_avianz.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 17:55:59.247693+00
\copy selection_metadata FROM 'selection_metadata_avianz.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 17:56:32.451374+00
\copy label FROM 'label_avianz.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 17:57:17.600579+00
\copy label_subtype FROM 'label_subtype_avianz.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 17:57:57.270427+00
\copy species_dataset FROM 'species_dataset.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 17:58:40.500378+00
-- selections data for pomona 20230418
\copy selection FROM 'selection_pomona_20230418.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 18:02:25.360178+00
\copy selection_metadata FROM 'selection_metadata_pomona_20230418.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 18:03:38.790581+00
\copy label FROM 'label_pomona_20230418.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 18:08:04.202488+00
\copy label_subtype FROM 'label_subtype_pomona_20230418.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 18:10:55.322432+00
-- selections data for pomona 20250304
\copy selection FROM 'selection_pomona_20250304.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 18:14:26.407255+00
\copy selection_metadata FROM 'selection_metadata_pomona_20250304.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 18:15:08.567327+00
\copy label FROM 'label_pomona_20250304.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 18:18:05.112561+00
\copy label_subtype FROM 'label_subtype_pomona_20250304.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''
SELECT NOW();
-- 2025-04-09 18:21:01.12717+00
INSERT INTO access_grant (
id,
dataset_id,
role,
permission,
user_id,
created_at,
created_by,
last_modified,
modified_by,
active
) VALUES
('5HCmMIimQMcD', 'Yx0oNUDmP5ch', 'ADMIN', 'READ', NULL, '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', true),
('_Z-XdwNTvDFg', 'Yx0oNUDmP5ch', 'ADMIN', 'UPLOAD', NULL, '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', true),
('CWZyEI1Oa1i', 'Yx0oNUDmP5ch', 'ADMIN', 'DOWNLOAD', NULL, '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', true),
('-Ohke6B7z6Ht', 'Yx0oNUDmP5ch', 'ADMIN', 'EDIT', NULL, '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', true),
('5ske4RlY2xjJ', 'Yx0oNUDmP5ch', 'ADMIN', 'DELETE', NULL, '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', true),
('zJjp5lmZid0e', 'gljgxDbfasva', 'ADMIN', 'READ', NULL, '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', true),
('fEJysuxie266', 'gljgxDbfasva', 'ADMIN', 'UPLOAD', NULL, '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', true),
('hz-hnQ3AScnV', 'gljgxDbfasva', 'ADMIN', 'DOWNLOAD', NULL, '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', true),
('ob4WillMpLnV', 'gljgxDbfasva', 'ADMIN', 'EDIT', NULL, '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', true),
('qd5ABiggu2np', 'gljgxDbfasva', 'ADMIN', 'DELETE', NULL, '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', true);
SELECT NOW();
-- 2025-04-09 18:22:12.620392+00
INSERT INTO user_role (
user_id,
role,
created_at,
created_by,
last_modified,
modified_by
) VALUES
('kp_8663c3b3d4654734874b17d846bbe52d', 'ADMIN', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d');
SELECT NOW();
-- 2025-04-09 18:22:38.804412+00
# ME
npm create cloudflare@latest skraak -- --template=cloudflare/templates/vite-react-template //for vite+react+cloudflare template
pijul init, add, record
claude
volta pin node@22
npm i drizzle-orm @neondatabase/serverless
npm i -D drizzle-kit postgres dotenv tsx
copied in .dev.vars with neon connection string
copied db/ in, including converted db/schema.ts
added api/datasets route
added raw get to App.tsx to get datasets
cleaned out all unused code and stuff including svg's
npm run deploy again to check it works on cloudflare
fixed errors and deployed ok
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Commands
- Build: `npm run build` (TypeScript build + Vite build)
- Dev server: `npm run dev` (Vite dev server with HMR)
- Lint: `npm run lint` (ESLint)
- Type check: `tsc -b` (TypeScript build checking)
- Deploy: `npm run deploy` (builds and deploys to Cloudflare Workers)
- Preview: `npm run preview` (preview production build locally)
## Code Style Guidelines
- TypeScript strict mode with noUnusedLocals, noUnusedParameters flags
- React functional components with hooks (follow react-hooks plugin rules)
- Project structure: src/react-app (frontend), src/worker (Cloudflare Workers backend)
- Import order: React/external libraries first, then internal modules
- Naming: PascalCase for components, camelCase for functions/variables
- Error handling: use try/catch blocks for async operations
- Use Hono for backend API routes with typed request/response interfaces
- Avoid any type; use explicit type annotations for function parameters/returns
- Follow ESLint recommended configurations for JS and TypeScript