EUEH65HBT4XZXCNWECJXNDEQAWR2NLNSAXFPXLMQ27NOVMQBJT5QC app.get("/api/", (c) => c.json({ name: "Cloudflare" }));
// API routesapp.get("/api", (c) => c.json({ status: "ok" }));// Datasets API routeapp.get("/api/datasets", async (c) => {try {const sql = neon(c.env.DATABASE_URL);const db = drizzle(sql);const results = await db.select().from(dataset);return c.json({data: results,});} catch (error) {console.error("Error fetching datasets:", error);return c.json({error: "Failed to fetch datasets",details: error instanceof Error ? error.message : String(error),},500);}});
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="35.93" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 228"><path fill="#00D8FF" d="M210.483 73.824a171.49 171.49 0 0 0-8.24-2.597c.465-1.9.893-3.777 1.273-5.621c6.238-30.281 2.16-54.676-11.769-62.708c-13.355-7.7-35.196.329-57.254 19.526a171.23 171.23 0 0 0-6.375 5.848a155.866 155.866 0 0 0-4.241-3.917C100.759 3.829 77.587-4.822 63.673 3.233C50.33 10.957 46.379 33.89 51.995 62.588a170.974 170.974 0 0 0 1.892 8.48c-3.28.932-6.445 1.924-9.474 2.98C17.309 83.498 0 98.307 0 113.668c0 15.865 18.582 31.778 46.812 41.427a145.52 145.52 0 0 0 6.921 2.165a167.467 167.467 0 0 0-2.01 9.138c-5.354 28.2-1.173 50.591 12.134 58.266c13.744 7.926 36.812-.22 59.273-19.855a145.567 145.567 0 0 0 5.342-4.923a168.064 168.064 0 0 0 6.92 6.314c21.758 18.722 43.246 26.282 56.54 18.586c13.731-7.949 18.194-32.003 12.4-61.268a145.016 145.016 0 0 0-1.535-6.842c1.62-.48 3.21-.974 4.76-1.488c29.348-9.723 48.443-25.443 48.443-41.52c0-15.417-17.868-30.326-45.517-39.844Zm-6.365 70.984c-1.4.463-2.836.91-4.3 1.345c-3.24-10.257-7.612-21.163-12.963-32.432c5.106-11 9.31-21.767 12.459-31.957c2.619.758 5.16 1.557 7.61 2.4c23.69 8.156 38.14 20.213 38.14 29.504c0 9.896-15.606 22.743-40.946 31.14Zm-10.514 20.834c2.562 12.94 2.927 24.64 1.23 33.787c-1.524 8.219-4.59 13.698-8.382 15.893c-8.067 4.67-25.32-1.4-43.927-17.412a156.726 156.726 0 0 1-6.437-5.87c7.214-7.889 14.423-17.06 21.459-27.246c12.376-1.098 24.068-2.894 34.671-5.345a134.17 134.17 0 0 1 1.386 6.193ZM87.276 214.515c-7.882 2.783-14.16 2.863-17.955.675c-8.075-4.657-11.432-22.636-6.853-46.752a156.923 156.923 0 0 1 1.869-8.499c10.486 2.32 22.093 3.988 34.498 4.994c7.084 9.967 14.501 19.128 21.976 27.15a134.668 134.668 0 0 1-4.877 4.492c-9.933 8.682-19.886 14.842-28.658 17.94ZM50.35 144.747c-12.483-4.267-22.792-9.812-29.858-15.863c-6.35-5.437-9.555-10.836-9.555-15.216c0-9.322 13.897-21.212 37.076-29.293c2.813-.98 5.757-1.905 8.812-2.773c3.204 10.42 7.406 21.315 12.477 32.332c-5.137 11.18-9.399 22.249-12.634 32.792a134.718 134.718 0 0 1-6.318-1.979Zm12.378-84.26c-4.811-24.587-1.616-43.134 6.425-47.789c8.564-4.958 27.502 2.111 47.463 19.835a144.318 144.318 0 0 1 3.841 3.545c-7.438 7.987-14.787 17.08-21.808 26.988c-12.04 1.116-23.565 2.908-34.161 5.309a160.342 160.342 0 0 1-1.76-7.887Zm110.427 27.268a347.8 347.8 0 0 0-7.785-12.803c8.168 1.033 15.994 2.404 23.343 4.08c-2.206 7.072-4.956 14.465-8.193 22.045a381.151 381.151 0 0 0-7.365-13.322Zm-45.032-43.861c5.044 5.465 10.096 11.566 15.065 18.186a322.04 322.04 0 0 0-30.257-.006c4.974-6.559 10.069-12.652 15.192-18.18ZM82.802 87.83a323.167 323.167 0 0 0-7.227 13.238c-3.184-7.553-5.909-14.98-8.134-22.152c7.304-1.634 15.093-2.97 23.209-3.984a321.524 321.524 0 0 0-7.848 12.897Zm8.081 65.352c-8.385-.936-16.291-2.203-23.593-3.793c2.26-7.3 5.045-14.885 8.298-22.6a321.187 321.187 0 0 0 7.257 13.246c2.594 4.48 5.28 8.868 8.038 13.147Zm37.542 31.03c-5.184-5.592-10.354-11.779-15.403-18.433c4.902.192 9.899.29 14.978.29c5.218 0 10.376-.117 15.453-.343c-4.985 6.774-10.018 12.97-15.028 18.486Zm52.198-57.817c3.422 7.8 6.306 15.345 8.596 22.52c-7.422 1.694-15.436 3.058-23.88 4.071a382.417 382.417 0 0 0 7.859-13.026a347.403 347.403 0 0 0 7.425-13.565Zm-16.898 8.101a358.557 358.557 0 0 1-12.281 19.815a329.4 329.4 0 0 1-23.444.823c-7.967 0-15.716-.248-23.178-.732a310.202 310.202 0 0 1-12.513-19.846h.001a307.41 307.41 0 0 1-10.923-20.627a310.278 310.278 0 0 1 10.89-20.637l-.001.001a307.318 307.318 0 0 1 12.413-19.761c7.613-.576 15.42-.876 23.31-.876H128c7.926 0 15.743.303 23.354.883a329.357 329.357 0 0 1 12.335 19.695a358.489 358.489 0 0 1 11.036 20.54a329.472 329.472 0 0 1-11 20.722Zm22.56-122.124c8.572 4.944 11.906 24.881 6.52 51.026c-.344 1.668-.73 3.367-1.15 5.09c-10.622-2.452-22.155-4.275-34.23-5.408c-7.034-10.017-14.323-19.124-21.64-27.008a160.789 160.789 0 0 1 5.888-5.4c18.9-16.447 36.564-22.941 44.612-18.3ZM128 90.808c12.625 0 22.86 10.235 22.86 22.86s-10.235 22.86-22.86 22.86s-22.86-10.235-22.86-22.86s10.235-22.86 22.86-22.86Z"></path></svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 76 98"><path fill="url(#a)" d="m11 25 7 9s9-18 22-34c17 20 36 48 36 64 0 20-19 34-37 34C17 98 0 81 0 61c0-6 3-24 11-36Z"/><path fill="#F95" d="M39 21c47 51 14 66 0 66-11 0-51-11 0-66Z"/><defs><linearGradient id="a" x2="0%" y2="100%"><stop stop-color="#F84"/><stop offset="100%" stop-color="#F30"/></linearGradient></defs></svg>
<?xml version="1.0" encoding="utf-8"?><!-- Generator: Adobe Illustrator 21.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0) --><svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"viewBox="0 0 822.8 355.5" style="enable-background:new 0 0 822.8 355.5;" xml:space="preserve"><style type="text/css">.st0{fill:#FFFFFF;}.st1{fill:#FBAE40;}.st2{fill:#F58220;}</style><g id="Page-1"><path id="CLOUDFLARE-_xAE_" class="st0" d="M772.2,252.6c-3.4,0-6.1-2.7-6.1-6.1c0-3.3,2.7-6.1,6.1-6.1c3.3,0,6.1,2.7,6.1,6.1C778.3,249.8,775.5,252.6,772.2,252.6L772.2,252.6z M772.2,241.6c-2.7,0-4.9,2.2-4.9,4.9s2.2,4.9,4.9,4.9c2.7,0,4.9-2.2,4.9-4.9S774.9,241.6,772.2,241.6L772.2,241.6z M775.3,249.7h-1.4l-1.2-2.3h-1.6v2.3h-1.3V243h3.2c1.4,0,2.3,0.9,2.3,2.2c0,1-0.6,1.7-1.4,2L775.3,249.7z M772.9,246.2c0.5,0,1-0.3,1-1c0-0.8-0.4-1-1-1h-2v2H772.9z M136.7,239.8h15.6v42.5h27.1v13.6h-42.7V239.8zM195.5,268v-0.2c0-16.1,13-29.2,30.3-29.2s30.1,12.9,30.1,29v0.2c0,16.1-13,29.2-30.3,29.2S195.5,284.1,195.5,268z M240.1,268v-0.2c0-8.1-5.8-15.1-14.4-15.1c-8.5,0-14.2,6.9-14.2,15v0.2c0,8.1,5.8,15.1,14.3,15.1C234.4,283,240.1,276.1,240.1,268zM275,271.3v-31.5h15.8V271c0,8.1,4.1,11.9,10.3,11.9c6.2,0,10.3-3.7,10.3-11.5v-31.6h15.8v31.1c0,18.1-10.3,26-26.3,26C285,296.9,275,288.9,275,271.3z M351,239.8h21.6c20,0,31.7,11.5,31.7,27.7v0.2c0,16.2-11.8,28.2-32,28.2H351V239.8z M372.9,282.1c9.3,0,15.5-5.1,15.5-14.2v-0.2c0-9-6.2-14.2-15.5-14.2h-6.3V282L372.9,282.1L372.9,282.1z M426.9,239.8h44.9v13.6h-29.4v9.6H469v12.9h-26.6v20h-15.5V239.8z M493.4,239.8h15.5v42.5h27.2v13.6h-42.7V239.8z M576.7,239.4h15l23.9,56.5h-16.7l-4.1-10h-21.6l-4,10h-16.3L576.7,239.4z M590.4,273.8l-6.2-15.9l-6.3,15.9H590.4z M635.6,239.8h26.5c8.6,0,14.5,2.2,18.3,6.1c3.3,3.2,5,7.5,5,13.1v0.2c0,8.6-4.6,14.3-11.5,17.2l13.4,19.6h-18L658,279h-6.8v17h-15.6V239.8z M661.4,266.7c5.3,0,8.3-2.6,8.3-6.6v-0.2c0-4.4-3.2-6.6-8.4-6.6h-10.2v13.4H661.4z M707.8,239.8h45.1V253h-29.7v8.5h26.9v12.3h-26.9v8.9h30.1v13.2h-45.5V239.8zM102.7,274.6c-2.2,4.9-6.8,8.4-12.8,8.4c-8.5,0-14.3-7.1-14.3-15.1v-0.2c0-8.1,5.7-15,14.2-15c6.4,0,11.3,3.9,13.3,9.3h16.4c-2.6-13.4-14.4-23.3-29.6-23.3c-17.3,0-30.3,13.1-30.3,29.2v0.2c0,16.1,12.8,29,30.1,29c14.8,0,26.4-9.6,29.4-22.4L102.7,274.6z"/><path id="flare" class="st0" d="M734.5,150.4l-40.7-24.7c-0.6-0.1-4.4,0.3-6.4-0.7c-1.4-0.7-2.5-1.9-3.2-4c-3.2,0-175.5,0-175.5,0v91.8h225.8V150.4z"/><path id="right-cloud" class="st1" d="M692.2,125.8c-0.8,0-1.5,0.6-1.8,1.4l-4.8,16.7c-2.1,7.2-1.3,13.8,2.2,18.7c3.2,4.5,8.6,7.1,15.1,7.4l26.2,1.6c0.8,0,1.5,0.4,1.9,1c0.4,0.6,0.5,1.5,0.3,2.2c-0.4,1.2-1.6,2.1-2.9,2.2l-27.3,1.6c-14.8,0.7-30.7,12.6-36.3,27.2l-2,5.1c-0.4,1,0.3,2,1.4,2H758c1.1,0,2.1-0.7,2.4-1.8c1.6-5.8,2.5-11.9,2.5-18.2c0-37-30.2-67.2-67.3-67.2C694.5,125.7,693.3,125.7,692.2,125.8z"/><path id="left-cloud" class="st2" d="M656.4,204.6c2.1-7.2,1.3-13.8-2.2-18.7c-3.2-4.5-8.6-7.1-15.1-7.4L516,176.9c-0.8,0-1.5-0.4-1.9-1c-0.4-0.6-0.5-1.4-0.3-2.2c0.4-1.2,1.6-2.1,2.9-2.2l124.2-1.6c14.7-0.7,30.7-12.6,36.3-27.2l7.1-18.5c0.3-0.8,0.4-1.6,0.2-2.4c-8-36.2-40.3-63.2-78.9-63.2c-35.6,0-65.8,23-76.6,54.9c-7-5.2-15.9-8-25.5-7.1c-17.1,1.7-30.8,15.4-32.5,32.5c-0.4,4.4-0.1,8.7,0.9,12.7c-27.9,0.8-50.2,23.6-50.2,51.7c0,2.5,0.2,5,0.5,7.5c0.2,1.2,1.2,2.1,2.4,2.1h227.2c1.3,0,2.5-0.9,2.9-2.2L656.4,204.6z"/></g><g></g><g></g><g></g><g></g><g></g><g></g></svg>
const [name, setName] = useState("unknown");
const [datasets, setDatasets] = useState<string>("No data loaded");const [loading, setLoading] = useState<boolean>(false);const [error, setError] = useState<string | null>(null);const fetchDatasets = async () => {setLoading(true);setError(null);try {const response = await fetch("/api/datasets");if (!response.ok) {throw new Error(`HTTP error! Status: ${response.status}`);}const data = await response.json() as DatasetsResponse;setDatasets(JSON.stringify(data, null, 2));} catch (err) {setError(err instanceof Error ? err.message : "Failed to fetch datasets");console.error("Error fetching datasets:", err);} finally {setLoading(false);}};
<div><a href="https://vite.dev" target="_blank"><img src={viteLogo} className="logo" alt="Vite logo" /></a><a href="https://react.dev" target="_blank"><img src={reactLogo} className="logo react" alt="React logo" /></a><a href="https://hono.dev/" target="_blank"><img src={honoLogo} className="logo cloudflare" alt="Hono logo" /></a><a href="https://workers.cloudflare.com/" target="_blank"><imgsrc={cloudflareLogo}className="logo cloudflare"alt="Cloudflare logo"/></a></div><h1>Vite + React + Hono + Cloudflare</h1>
<buttononClick={() => {fetch("/api/").then((res) => res.json() as Promise<{ name: string }>).then((data) => setName(data.name));}}aria-label="get name"
<h2>Datasets</h2><buttononClick={fetchDatasets}disabled={loading}aria-label="fetch datasets"
.error {color: #e53935;background-color: #ffebee;padding: 8px;border-radius: 4px;margin: 8px 0;}
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="31.88" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 257"><defs><linearGradient id="IconifyId1813088fe1fbc01fb466" x1="-.828%" x2="57.636%" y1="7.652%" y2="78.411%"><stop offset="0%" stop-color="#41D1FF"></stop><stop offset="100%" stop-color="#BD34FE"></stop></linearGradient><linearGradient id="IconifyId1813088fe1fbc01fb467" x1="43.376%" x2="50.316%" y1="2.242%" y2="89.03%"><stop offset="0%" stop-color="#FFEA83"></stop><stop offset="8.333%" stop-color="#FFDD35"></stop><stop offset="100%" stop-color="#FFA800"></stop></linearGradient></defs><path fill="url(#IconifyId1813088fe1fbc01fb466)" d="M255.153 37.938L134.897 252.976c-2.483 4.44-8.862 4.466-11.382.048L.875 37.958c-2.746-4.814 1.371-10.646 6.827-9.67l120.385 21.517a6.537 6.537 0 0 0 2.322-.004l117.867-21.483c5.438-.991 9.574 4.796 6.877 9.62Z"></path><path fill="url(#IconifyId1813088fe1fbc01fb467)" d="M185.432.063L96.44 17.501a3.268 3.268 0 0 0-2.634 3.014l-5.474 92.456a3.268 3.268 0 0 0 3.997 3.378l24.777-5.718c2.318-.535 4.413 1.507 3.936 3.838l-7.361 36.047c-.495 2.426 1.782 4.5 4.151 3.78l15.304-4.649c2.372-.72 4.652 1.36 4.15 3.788l-11.698 56.621c-.732 3.542 3.979 5.473 5.943 2.437l1.313-2.028l72.516-144.72c1.215-2.423-.88-5.186-3.54-4.672l-25.505 4.922c-2.396.462-4.435-1.77-3.759-4.114l16.646-57.705c.677-2.35-1.37-4.583-3.769-4.113Z"></path></svg>
"cloudflare": {"label": "Vite React Template","products": ["Workers"],"categories": [],"icon_urls": ["https://imagedelivery.net/wSMYJvS3Xw-n339CbDyDIA/5ca0ca32-e897-4699-d4c1-6b680512f000/public"],"preview_image_url": "https://imagedelivery.net/wSMYJvS3Xw-n339CbDyDIA/fc7b4b62-442b-4769-641b-ad4422d74300/public","dash": true},
},"node_modules/@drizzle-team/brocli": {"version": "0.10.2","resolved": "https://registry.npmjs.org/@drizzle-team/brocli/-/brocli-0.10.2.tgz","integrity": "sha512-z33Il7l5dKjUgGULTqBsQBQwckHh5AbIuxhdsIxDDiZAzBOrZO6q9ogcWC65kU382AfynTfgNumVcNIjuIua6w==","dev": true,"license": "Apache-2.0"
}},"node_modules/@esbuild-kit/core-utils": {"version": "3.3.2","resolved": "https://registry.npmjs.org/@esbuild-kit/core-utils/-/core-utils-3.3.2.tgz","integrity": "sha512-sPRAnw9CdSsRmEtnsl2WXWdyquogVpB3yZ3dgwJfe8zrOzTsV7cJvmwrKVa+0ma5BoiGJ+BoqkMvawbayKUsqQ==","deprecated": "Merged into tsx: https://tsx.is","dev": true,"license": "MIT","dependencies": {"esbuild": "~0.18.20","source-map-support": "^0.5.21"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/android-arm": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.18.20.tgz","integrity": "sha512-fyi7TDI/ijKKNZTUJAQqiG5T7YjJXgnzkURqmGj13C6dCqckZBLdl4h7bkhHt/t0WP+zO9/zwroDvANaOqO5Sw==","cpu": ["arm"],"dev": true,"license": "MIT","optional": true,"os": ["android"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/android-arm64": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.18.20.tgz","integrity": "sha512-Nz4rJcchGDtENV0eMKUNa6L12zz2zBDXuhj/Vjh18zGqB44Bi7MBMSXjgunJgjRhCmKOjnPuZp4Mb6OKqtMHLQ==","cpu": ["arm64"],"dev": true,"license": "MIT","optional": true,"os": ["android"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/android-x64": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.18.20.tgz","integrity": "sha512-8GDdlePJA8D6zlZYJV/jnrRAi6rOiNaCC/JclcXpB+KIuvfBN4owLtgzY2bsxnx666XjJx2kDPUmnTtR8qKQUg==","cpu": ["x64"],"dev": true,"license": "MIT","optional": true,"os": ["android"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/darwin-arm64": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.18.20.tgz","integrity": "sha512-bxRHW5kHU38zS2lPTPOyuyTm+S+eobPUnTNkdJEfAddYgEcll4xkT8DB9d2008DtTbl7uJag2HuE5NZAZgnNEA==","cpu": ["arm64"],"dev": true,"license": "MIT","optional": true,"os": ["darwin"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/darwin-x64": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.18.20.tgz","integrity": "sha512-pc5gxlMDxzm513qPGbCbDukOdsGtKhfxD1zJKXjCCcU7ju50O7MeAZ8c4krSJcOIJGFR+qx21yMMVYwiQvyTyQ==","cpu": ["x64"],"dev": true,"license": "MIT","optional": true,"os": ["darwin"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/freebsd-arm64": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.18.20.tgz","integrity": "sha512-yqDQHy4QHevpMAaxhhIwYPMv1NECwOvIpGCZkECn8w2WFHXjEwrBn3CeNIYsibZ/iZEUemj++M26W3cNR5h+Tw==","cpu": ["arm64"],"dev": true,"license": "MIT","optional": true,"os": ["freebsd"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/freebsd-x64": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.18.20.tgz","integrity": "sha512-tgWRPPuQsd3RmBZwarGVHZQvtzfEBOreNuxEMKFcd5DaDn2PbBxfwLcj4+aenoh7ctXcbXmOQIn8HI6mCSw5MQ==","cpu": ["x64"],"dev": true,"license": "MIT","optional": true,"os": ["freebsd"],"engines": {"node": ">=12"
"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/linux-arm": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.18.20.tgz","integrity": "sha512-/5bHkMWnq1EgKr1V+Ybz3s1hWXok7mDFUMQ4cG10AfW3wL02PSZi5kFpYKrptDsgb2WAJIvRcDm+qIvXf/apvg==","cpu": ["arm"],"dev": true,"license": "MIT","optional": true,"os": ["linux"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/linux-arm64": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.18.20.tgz","integrity": "sha512-2YbscF+UL7SQAVIpnWvYwM+3LskyDmPhe31pE7/aoTMFKKzIc9lLbyGUpmmb8a8AixOL61sQ/mFh3jEjHYFvdA==","cpu": ["arm64"],"dev": true,"license": "MIT","optional": true,"os": ["linux"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/linux-ia32": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.18.20.tgz","integrity": "sha512-P4etWwq6IsReT0E1KHU40bOnzMHoH73aXp96Fs8TIT6z9Hu8G6+0SHSw9i2isWrD2nbx2qo5yUqACgdfVGx7TA==","cpu": ["ia32"],"dev": true,"license": "MIT","optional": true,"os": ["linux"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/linux-loong64": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.18.20.tgz","integrity": "sha512-nXW8nqBTrOpDLPgPY9uV+/1DjxoQ7DoB2N8eocyq8I9XuqJ7BiAMDMf9n1xZM9TgW0J8zrquIb/A7s3BJv7rjg==","cpu": ["loong64"],"dev": true,"license": "MIT","optional": true,"os": ["linux"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/linux-mips64el": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.18.20.tgz","integrity": "sha512-d5NeaXZcHp8PzYy5VnXV3VSd2D328Zb+9dEq5HE6bw6+N86JVPExrA6O68OPwobntbNJ0pzCpUFZTo3w0GyetQ==","cpu": ["mips64el"],"dev": true,"license": "MIT","optional": true,"os": ["linux"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/linux-ppc64": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.18.20.tgz","integrity": "sha512-WHPyeScRNcmANnLQkq6AfyXRFr5D6N2sKgkFo2FqguP44Nw2eyDlbTdZwd9GYk98DZG9QItIiTlFLHJHjxP3FA==","cpu": ["ppc64"],"dev": true,"license": "MIT","optional": true,"os": ["linux"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/linux-riscv64": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.18.20.tgz","integrity": "sha512-WSxo6h5ecI5XH34KC7w5veNnKkju3zBRLEQNY7mv5mtBmrP/MjNBCAlsM2u5hDBlS3NGcTQpoBvRzqBcRtpq1A==","cpu": ["riscv64"],"dev": true,"license": "MIT","optional": true,"os": ["linux"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/linux-s390x": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.18.20.tgz","integrity": "sha512-+8231GMs3mAEth6Ja1iK0a1sQ3ohfcpzpRLH8uuc5/KVDFneH6jtAJLFGafpzpMRO6DzJ6AvXKze9LfFMrIHVQ==","cpu": ["s390x"],"dev": true,"license": "MIT","optional": true,"os": ["linux"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/linux-x64": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.18.20.tgz","integrity": "sha512-UYqiqemphJcNsFEskc73jQ7B9jgwjWrSayxawS6UVFZGWrAAtkzjxSqnoclCXxWtfwLdzU+vTpcNYhpn43uP1w==","cpu": ["x64"],"dev": true,"license": "MIT","optional": true,"os": ["linux"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/netbsd-x64": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.18.20.tgz","integrity": "sha512-iO1c++VP6xUBUmltHZoMtCUdPlnPGdBom6IrO4gyKPFFVBKioIImVooR5I83nTew5UOYrk3gIJhbZh8X44y06A==","cpu": ["x64"],"dev": true,"license": "MIT","optional": true,"os": ["netbsd"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/openbsd-x64": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.18.20.tgz","integrity": "sha512-e5e4YSsuQfX4cxcygw/UCPIEP6wbIL+se3sxPdCiMbFLBWu0eiZOJ7WoD+ptCLrmjZBK1Wk7I6D/I3NglUGOxg==","cpu": ["x64"],"dev": true,"license": "MIT","optional": true,"os": ["openbsd"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/sunos-x64": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.18.20.tgz","integrity": "sha512-kDbFRFp0YpTQVVrqUd5FTYmWo45zGaXe0X8E1G/LKFC0v8x0vWrhOWSLITcCn63lmZIxfOMXtCfti/RxN/0wnQ==","cpu": ["x64"],"dev": true,"license": "MIT","optional": true,"os": ["sunos"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/win32-arm64": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.18.20.tgz","integrity": "sha512-ddYFR6ItYgoaq4v4JmQQaAI5s7npztfV4Ag6NrhiaW0RrnOXqBkgwZLofVTlq1daVTQNhtI5oieTvkRPfZrePg==","cpu": ["arm64"],"dev": true,"license": "MIT","optional": true,"os": ["win32"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/win32-ia32": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.18.20.tgz","integrity": "sha512-Wv7QBi3ID/rROT08SABTS7eV4hX26sVduqDOTe1MvGMjNd3EjOz4b7zeexIR62GTIEKrfJXKL9LFxTYgkyeu7g==","cpu": ["ia32"],"dev": true,"license": "MIT","optional": true,"os": ["win32"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/@esbuild/win32-x64": {"version": "0.18.20","resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.18.20.tgz","integrity": "sha512-kTdfRcSiDfQca/y9QIkng02avJ+NCaQvrMejlsB3RRv5sE9rRoeBPISaZpKxHELzRxZyLvNts1P27W3wV+8geQ==","cpu": ["x64"],"dev": true,"license": "MIT","optional": true,"os": ["win32"],"engines": {"node": ">=12"}},"node_modules/@esbuild-kit/core-utils/node_modules/esbuild": {"version": "0.18.20","resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.18.20.tgz","integrity": "sha512-ceqxoedUrcayh7Y7ZX6NdbbDzGROiyVBgC4PriJThBKSVPWnnFHZAkfI1lJT8QFkOwH4qOS2SJkS4wvpGl8BpA==","dev": true,"hasInstallScript": true,"license": "MIT","bin": {"esbuild": "bin/esbuild"},"engines": {"node": ">=12"},"optionalDependencies": {"@esbuild/android-arm": "0.18.20","@esbuild/android-arm64": "0.18.20","@esbuild/android-x64": "0.18.20","@esbuild/darwin-arm64": "0.18.20","@esbuild/darwin-x64": "0.18.20","@esbuild/freebsd-arm64": "0.18.20","@esbuild/freebsd-x64": "0.18.20","@esbuild/linux-arm": "0.18.20","@esbuild/linux-arm64": "0.18.20","@esbuild/linux-ia32": "0.18.20","@esbuild/linux-loong64": "0.18.20","@esbuild/linux-mips64el": "0.18.20","@esbuild/linux-ppc64": "0.18.20","@esbuild/linux-riscv64": "0.18.20","@esbuild/linux-s390x": "0.18.20","@esbuild/linux-x64": "0.18.20","@esbuild/netbsd-x64": "0.18.20","@esbuild/openbsd-x64": "0.18.20","@esbuild/sunos-x64": "0.18.20","@esbuild/win32-arm64": "0.18.20","@esbuild/win32-ia32": "0.18.20","@esbuild/win32-x64": "0.18.20"}},"node_modules/@esbuild-kit/esm-loader": {"version": "2.6.5","resolved": "https://registry.npmjs.org/@esbuild-kit/esm-loader/-/esm-loader-2.6.5.tgz","integrity": "sha512-FxEMIkJKnodyA1OaCUoEvbYRkoZlLZ4d/eXFu9Fh8CbBBgP5EmZxrfTRyN0qpXZ4vOvqnE5YdRdcrmUUXuU+dA==","deprecated": "Merged into tsx: https://tsx.is","dev": true,"license": "MIT","dependencies": {"@esbuild-kit/core-utils": "^3.3.2","get-tsconfig": "^4.7.0"}},
"node_modules/@neondatabase/serverless": {"version": "1.0.0","resolved": "https://registry.npmjs.org/@neondatabase/serverless/-/serverless-1.0.0.tgz","integrity": "sha512-XWmEeWpBXIoksZSDN74kftfTnXFEGZ3iX8jbANWBc+ag6dsiQuvuR4LgB0WdCOKMb5AQgjqgufc0TgAsZubUYw==","license": "MIT","dependencies": {"@types/node": "^22.10.2","@types/pg": "^8.8.0"},"engines": {"node": ">=19.0.0"}},
},"node_modules/@types/node": {"version": "22.14.1","resolved": "https://registry.npmjs.org/@types/node/-/node-22.14.1.tgz","integrity": "sha512-u0HuPQwe/dHrItgHHpmw3N2fYCR6x4ivMNbPHRkBVP4CvN+kiRrKHWk3i8tXiO/joPwXLMYvF9TTF0eqgHIuOw==","license": "MIT","dependencies": {"undici-types": "~6.21.0"}},"node_modules/@types/pg": {"version": "8.11.13","resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.11.13.tgz","integrity": "sha512-6kXByGkvRvwXLuyaWzsebs2du6+XuAB2CuMsuzP7uaihQahshVgSmB22Pmh0vQMkQ1h5+PZU0d+Di1o+WpVWJg==","license": "MIT","dependencies": {"@types/node": "*","pg-protocol": "*","pg-types": "^4.0.1"}
}},"node_modules/dotenv": {"version": "16.5.0","resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.5.0.tgz","integrity": "sha512-m/C+AwOAr9/W1UOIZUo232ejMNnJAJtYQjUbHoNTBNTJSvqzzDh7vnrei3o3r3m9blf6ZoDkvcw0VmozNRFJxg==","dev": true,"license": "BSD-2-Clause","engines": {"node": ">=12"},"funding": {"url": "https://dotenvx.com"}},"node_modules/drizzle-kit": {"version": "0.31.0","resolved": "https://registry.npmjs.org/drizzle-kit/-/drizzle-kit-0.31.0.tgz","integrity": "sha512-pcKVT+GbfPA+bUovPIilgVOoq+onNBo/YQBG86sf3/GFHkN6lRJPm1l7dKN0IMAk57RQoIm4GUllRrasLlcaSg==","dev": true,"license": "MIT","dependencies": {"@drizzle-team/brocli": "^0.10.2","@esbuild-kit/esm-loader": "^2.5.5","esbuild": "^0.25.2","esbuild-register": "^3.5.0"},"bin": {"drizzle-kit": "bin.cjs"}},"node_modules/drizzle-orm": {"version": "0.42.0","resolved": "https://registry.npmjs.org/drizzle-orm/-/drizzle-orm-0.42.0.tgz","integrity": "sha512-pS8nNJm2kBNZwrOjTHJfdKkaU+KuUQmV/vk5D57NojDq4FG+0uAYGMulXtYT///HfgsMF0hnFFvu1ezI3OwOkg==","license": "Apache-2.0","peerDependencies": {"@aws-sdk/client-rds-data": ">=3","@cloudflare/workers-types": ">=4","@electric-sql/pglite": ">=0.2.0","@libsql/client": ">=0.10.0","@libsql/client-wasm": ">=0.10.0","@neondatabase/serverless": ">=0.10.0","@op-engineering/op-sqlite": ">=2","@opentelemetry/api": "^1.4.1","@planetscale/database": ">=1.13","@prisma/client": "*","@tidbcloud/serverless": "*","@types/better-sqlite3": "*","@types/pg": "*","@types/sql.js": "*","@vercel/postgres": ">=0.8.0","@xata.io/client": "*","better-sqlite3": ">=7","bun-types": "*","expo-sqlite": ">=14.0.0","gel": ">=2","knex": "*","kysely": "*","mysql2": ">=2","pg": ">=8","postgres": ">=3","sql.js": ">=1","sqlite3": ">=5"},"peerDependenciesMeta": {"@aws-sdk/client-rds-data": {"optional": true},"@cloudflare/workers-types": {"optional": true},"@electric-sql/pglite": {"optional": true},"@libsql/client": {"optional": true},"@libsql/client-wasm": {"optional": true},"@neondatabase/serverless": {"optional": true},"@op-engineering/op-sqlite": {"optional": true},"@opentelemetry/api": {"optional": true},"@planetscale/database": {"optional": true},"@prisma/client": {"optional": true},"@tidbcloud/serverless": {"optional": true},"@types/better-sqlite3": {"optional": true},"@types/pg": {"optional": true},"@types/sql.js": {"optional": true},"@vercel/postgres": {"optional": true},"@xata.io/client": {"optional": true},"better-sqlite3": {"optional": true},"bun-types": {"optional": true},"expo-sqlite": {"optional": true},"gel": {"optional": true},"knex": {"optional": true},"kysely": {"optional": true},"mysql2": {"optional": true},"pg": {"optional": true},"postgres": {"optional": true},"prisma": {"optional": true},"sql.js": {"optional": true},"sqlite3": {"optional": true}
}},"node_modules/esbuild-register": {"version": "3.6.0","resolved": "https://registry.npmjs.org/esbuild-register/-/esbuild-register-3.6.0.tgz","integrity": "sha512-H2/S7Pm8a9CL1uhp9OvjwrBh5Pvx0H8qVOxNu8Wed9Y7qv56MPtq+GGM8RJpq6glYJn9Wspr8uw7l55uyinNeg==","dev": true,"license": "MIT","dependencies": {"debug": "^4.3.4"},"peerDependencies": {"esbuild": ">=0.12 <1"
}},"node_modules/get-tsconfig": {"version": "4.10.0","resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.10.0.tgz","integrity": "sha512-kGzZ3LWWQcGIAmg6iWvXn0ei6WDtV26wzHRMwDSzmAbcXrTEXxHy6IehI6/4eT6VRKyMP1eF1VqwrVUmE/LR7A==","dev": true,"license": "MIT","dependencies": {"resolve-pkg-maps": "^1.0.0"},"funding": {"url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
},"node_modules/pg-int8": {"version": "1.0.1","resolved": "https://registry.npmjs.org/pg-int8/-/pg-int8-1.0.1.tgz","integrity": "sha512-WCtabS6t3c8SkpDBUlb1kjOs7l66xsGdKpIPZsg4wR+B3+u9UAum2odSsF9tnvxg80h4ZxLWMy4pRjOsFIqQpw==","license": "ISC","engines": {"node": ">=4.0.0"}
"node_modules/pg-numeric": {"version": "1.0.2","resolved": "https://registry.npmjs.org/pg-numeric/-/pg-numeric-1.0.2.tgz","integrity": "sha512-BM/Thnrw5jm2kKLE5uJkXqqExRUY/toLHda65XgFTBTFYZyopbKjBe29Ii3RbkvlsMoFwD+tHeGaCjjv0gHlyw==","license": "ISC","engines": {"node": ">=4"}},"node_modules/pg-protocol": {"version": "1.8.0","resolved": "https://registry.npmjs.org/pg-protocol/-/pg-protocol-1.8.0.tgz","integrity": "sha512-jvuYlEkL03NRvOoyoRktBK7+qU5kOvlAwvmrH8sr3wbLrOdVWsRxQfz8mMy9sZFsqJ1hEWNfdWKI4SAmoL+j7g==","license": "MIT"},"node_modules/pg-types": {"version": "4.0.2","resolved": "https://registry.npmjs.org/pg-types/-/pg-types-4.0.2.tgz","integrity": "sha512-cRL3JpS3lKMGsKaWndugWQoLOCoP+Cic8oseVcbr0qhPzYD5DWXK+RZ9LY9wxRf7RQia4SCwQlXk0q6FCPrVng==","license": "MIT","dependencies": {"pg-int8": "1.0.1","pg-numeric": "1.0.2","postgres-array": "~3.0.1","postgres-bytea": "~3.0.0","postgres-date": "~2.1.0","postgres-interval": "^3.0.0","postgres-range": "^1.1.1"},"engines": {"node": ">=10"}},
"node_modules/postgres": {"version": "3.4.5","resolved": "https://registry.npmjs.org/postgres/-/postgres-3.4.5.tgz","integrity": "sha512-cDWgoah1Gez9rN3H4165peY9qfpEo+SA61oQv65O3cRUE1pOEoJWwddwcqKE8XZYjbblOJlYDlLV4h67HrEVDg==","devOptional": true,"license": "Unlicense","engines": {"node": ">=12"},"funding": {"type": "individual","url": "https://github.com/sponsors/porsager"}},"node_modules/postgres-array": {"version": "3.0.4","resolved": "https://registry.npmjs.org/postgres-array/-/postgres-array-3.0.4.tgz","integrity": "sha512-nAUSGfSDGOaOAEGwqsRY27GPOea7CNipJPOA7lPbdEpx5Kg3qzdP0AaWC5MlhTWV9s4hFX39nomVZ+C4tnGOJQ==","license": "MIT","engines": {"node": ">=12"}},"node_modules/postgres-bytea": {"version": "3.0.0","resolved": "https://registry.npmjs.org/postgres-bytea/-/postgres-bytea-3.0.0.tgz","integrity": "sha512-CNd4jim9RFPkObHSjVHlVrxoVQXz7quwNFpz7RY1okNNme49+sVyiTvTRobiLV548Hx/hb1BG+iE7h9493WzFw==","license": "MIT","dependencies": {"obuf": "~1.1.2"},"engines": {"node": ">= 6"}},"node_modules/postgres-date": {"version": "2.1.0","resolved": "https://registry.npmjs.org/postgres-date/-/postgres-date-2.1.0.tgz","integrity": "sha512-K7Juri8gtgXVcDfZttFKVmhglp7epKb1K4pgrkLxehjqkrgPhfG6OO8LHLkfaqkbpjNRnra018XwAr1yQFWGcA==","license": "MIT","engines": {"node": ">=12"}},"node_modules/postgres-interval": {"version": "3.0.0","resolved": "https://registry.npmjs.org/postgres-interval/-/postgres-interval-3.0.0.tgz","integrity": "sha512-BSNDnbyZCXSxgA+1f5UU2GmwhoI0aU5yMxRGO8CdFEcY2BQF9xm/7MqKnYoM1nJDk8nONNWDk9WeSmePFhQdlw==","license": "MIT","engines": {"node": ">=12"}},"node_modules/postgres-range": {"version": "1.1.4","resolved": "https://registry.npmjs.org/postgres-range/-/postgres-range-1.1.4.tgz","integrity": "sha512-i/hbxIE9803Alj/6ytL7UHQxRvZkI9O4Sy+J3HGc4F4oo/2eQAjTSNJ0bfxyse3bH0nuVesCk+3IRLaMtG3H6w==","license": "MIT"},
}},"node_modules/resolve-pkg-maps": {"version": "1.0.0","resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz","integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==","dev": true,"license": "MIT","funding": {"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
"node_modules/source-map-support": {"version": "0.5.21","resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.21.tgz","integrity": "sha512-uBHU3L3czsIyYXKX88fdrGovxdSCoTGDRZ6SYXtSRxLZUzHg5P/66Ht6uoUlHu9EZod+inXhKo3qQgwXUT/y1w==","dev": true,"license": "MIT","dependencies": {"buffer-from": "^1.0.0","source-map": "^0.6.0"}},
"node_modules/tsx": {"version": "4.19.3","resolved": "https://registry.npmjs.org/tsx/-/tsx-4.19.3.tgz","integrity": "sha512-4H8vUNGNjQ4V2EOoGw005+c+dGuPSnhpPBPHBtsZdGZBk/iJb4kguGlPWaZTZ3q5nMtFOEsY0nRDlh9PJyd6SQ==","dev": true,"license": "MIT","dependencies": {"esbuild": "~0.25.0","get-tsconfig": "^4.7.5"},"bin": {"tsx": "dist/cli.mjs"},"engines": {"node": ">=18.0.0"},"optionalDependencies": {"fsevents": "~2.3.3"}},
# Getting data into db## pomona and 247 files## pomona labels, avianz, 2023, 2025roundtrip ebird taxonomy through duckdb to get it to import into postgresCREATE TABLE imported_data ASSELECT * FROM read_csv_auto('eBird_taxonomy_v2024.tsv',delim='\t',header=true,auto_detect=true);COPY imported_data TO 'eBird_taxonomy_v2024.tsv' (FORMAT 'csv',DELIMITER '\t',HEADER true,QUOTE '"', -- Use double quotes for quotingESCAPE '\', -- Use backslash as escape characterNULL '' -- How NULL values should appear);\COPY ebird_taxonomy FROM 'eBird_taxonomy_v2024.tsv' WITH (FORMAT csv, DELIMITER E'\t', HEADER, QUOTE '"', ESCAPE '\', NULL '');CREATE SEQUENCE twenty_four_seven_sequence START 1;CREATE TABLE twenty_four_seven_files_xxh64(id INTEGER DEFAULT nextval('twenty_four_seven_sequence'),drive VARCHAR,location VARCHAR,trip_date DATE,file VARCHAR,latitude DOUBLE,longitude DOUBLE,start_rec_period TIMESTAMP WITH TIME ZONE,end_rec_period TIMESTAMP WITH TIME ZONE,duration_s DOUBLE,sample_rate INTEGER,utc_date_time TIMESTAMP WITH TIME ZONE,local_date_time TIMESTAMP WITH TIME ZONE,moth_id VARCHAR,gain VARCHAR,battery_v DOUBLE,temp_c DOUBLE,sha2_256 VARCHAR,night BOOLEAN,xxh64 VARCHAR,);COPY twenty_four_seven_files_xxh64 (drive, location, trip_date, file, latitude, longitude,start_rec_period, end_rec_period, duration_s, sample_rate,utc_date_time, local_date_time, moth_id, gain, battery_v,temp_c, sha2_256, night, xxh64) FROM '/Volumes/SSD1/DB_Files_with_xxh/twenty_four_seven_files_20250228_with_xxh64.csv';COPY (SELECTlocation,list(DISTINCT latitude) AS latitude_list,list(DISTINCT longitude) AS longitude_listFROMtwenty_four_seven_files_xxh64GROUP BYlocation) TO 'location_twenty_four_seven.csv' (HEADER, DELIMITER '\t');### now julia## location tabledf=CSV.read("location_twenty_four_seven.csv", DataFrame)# do stuff ### made a mestake here, mixed up dataset_id and location_id### i have dataset_id in field file.location_id### see fix belowCSV.write("location.tsv", df,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)## clustersCOPY (SELECT location, trip_dateFROM pomona_files_xxh64GROUP BY location, trip_date) TO 'cluster_pomona.csv' (HEADER, DELIMITER '\t');# edit in easycsv, add columns, headerslocation=CSV.read("location.tsv", DataFrame)cluster=CSV.read("cluster_pomona.csv", DataFrame)#do stuff to populate clusterc=select(cluster, Not(:location_id))l = select(location, :id => :location_id, :dataset_id, :name)n = leftjoin(c, l, on = [:dataset_id, :name])select!(n, :id,:dataset_id,:location_id,:name,:description,:created_by,:created_at,:last_modified,:modified_by,:active,:timezone_id)#merge name and description(trip_date)x=[]for row in eachrow(n)y=row.name * " " * string(row.description)push!(x, y)endn.name=xn.description .= missingCSV.write("cluster.tsv", n,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = true # Whether to append to an existing file)look at 2022-03-23 cluster later, mutate to subsequent cluster, only N14 serviced# note needed to add timezone_id to duckdb, yet to add to neonALTER TABLE clusterADD COLUMN timezone_id VARCHAR(40); -- NOT NULL; constraint fails in duckdbUPDATE clusterSET timezone_id = 'Pacific/Auckland'; -- empty anyway, already changed tsv.cant easily drop it and recreate because cyclic_recording_pattern depends on it## cyclic_recording_patternwork back from file, todo## files# copy pomona files into duckdb, copy 2500kHz & 24/7 files into duckdbCOPY pomona_files_xxh64 TO 'pomona_files.csv' (HEADER, DELIMITER '\t');COPY twenty_four_seven_files_xxh64 TO 'twenty_four_seven_files.csv' (HEADER, DELIMITER '\t');added new columns needed in easycsvlater, loaded both into dfpdf=CSV.read("pomona_files.csv", DataFrame)tdf=CSV.read("twenty_four_seven_files.csv", DataFrame)pdf.location_id = ["Yx0oNUDmP5ch" for x in 1:length(pdf.location_id)]tdf.location_id = ["gljgxDbfasva" for x in 1:length(tdf.location_id)]pdf.description = pdf.location .* " " .* string.(pdf.trip_date)tdf.description = tdf.location .* " " .* string.(tdf.trip_date)pdf.storage_prefix = [nanoid(3) for x in 1:length(pdf.storage_prefix)]tdf.storage_prefix = [nanoid(3) for x in 1:length(tdf.storage_prefix)]pdf.upload = [true for x in 1:length(pdf.upload)]tdf.upload = [true for x in 1:length(tdf.upload)]pdf.created_by = ["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:length(pdf.created_by)]tdf.created_by = ["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:length(tdf.created_by)]pdf.modified_by = ["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:length(pdf.modified_by)]tdf.modified_by = ["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:length(tdf.modified_by)]w=now(tz"UTC")pdf.created_at = [w for x in 1:length(pdf.created_at)]tdf.created_at = [w for x in 1:length(tdf.created_at)]pdf.last_modified = [w for x in 1:length(pdf.last_modified)]tdf.last_modified = [w for x in 1:length(tdf.last_modified)]pdf.active = [true for x in 1:length(pdf.active)]tdf.active = [true for x in 1:length(tdf.active)]pdf.dataset_id = ["Yx0oNUDmP5ch" for x in 1:length(pdf.dataset_id)]tdf.dataset_id = ["gljgxDbfasva" for x in 1:length(tdf.dataset_id)]pdf.record_s = [895 for x in 1:length(pdf.record_s)]tdf.record_s = [60 for x in 1:length(tdf.record_s)]rename!(pdf, :Column28 => :sleep_s)rename!(tdf, :Column28 => :sleep_s)pdf.sleep_s = [5 for x in 1:length(pdf.sleep_s)] # not neededtdf.sleep_s = ???????????????????????????????????????????????????????????????? not neededCSV.write("pomona_files2.tsv", pdf,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)CSV.write("twenty_four_seven_files2.tsv", tdf,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)rename!(pdf, :timestamp_utc => :timestamp)rename!(tdf, :timestamp_utc => :timestamp)CSV.write("pomona_files3.tsv", pdf,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)CSV.write("twenty_four_seven_files3.tsv", tdf,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)#check time format in moth metadata and my codeusing Dates, TimeZones, JSON3function is_solar_night(timestamp::String, sunrise::String, sunset::String)::Boolts = ZonedDateTime(timestamp)sr = ZonedDateTime(sunrise)ss = ZonedDateTime(sunset)# Handle cases where sunrise is after sunset (e.g., high latitudes)if sr > ssreturn ts > ss || ts < srelsereturn ts > ss || ts < srendendfunction is_civil_night(timestamp::String, dawn::String, dusk::String)::Boolts = ZonedDateTime(timestamp)dn = ZonedDateTime(dawn)ds = ZonedDateTime(dusk)# Handle cases where dawn is after dusk (e.g., high latitudes)if dn > dsreturn ts > ds || ts < dnelsereturn ts > ds || ts < dnendend# pomonas=[]c=[]m=[]for i in 1:nrow(pdf)t = ZonedDateTime(String(pdf[i, :timestamp]), "yyyy-mm-dd HH:MM:SSzzzz") |>x -> astimezone(x, tz"UTC") |>x -> x + Second(pdf[i, :duration] |> x -> trunc(x)) ÷ 2 |> #Second conversion requires Intx -> Dates.format(x, "yyyy-mm-ddTHH:MM:SSzzzz")# suncalc-cli 2025-03-22T21:02:34.540+00:00 -45.50603 167.48374cmd = `suncalc-cli $t $(pdf[i, :latitude]) $(pdf[i, :longitude])`json = read(cmd, String) |>x -> JSON3.read(x)solar = is_solar_night(t, json.sunTimes.sunrise, json.sunTimes.sunset)civil = is_civil_night(t, json.sunTimes.dawn, json.sunTimes.dusk)moon = json.moon.phase |> x -> round(x, digits=2)push!(s, solar)push!(c, civil)push!(m, moon)println(i, " ", pdf[i, :night], " ", solar, civil, moon)endpdf.maybe_solar_night = spdf.maybe_civil_night = cpdf.moon_phase = mCSV.write("pomona_files4.tsv", pdf,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)# twenty_four_sevens=[]c=[]m=[]for i in 1:nrow(tdf)t = ZonedDateTime(String(tdf[i, :timestamp]), "yyyy-mm-dd HH:MM:SSzzzz") |>x -> astimezone(x, tz"UTC") |>x -> x + Second(tdf[i, :duration] |> x -> trunc(x)) ÷ 2 |>x -> Dates.format(x, "yyyy-mm-ddTHH:MM:SSzzzz")# suncalc-cli 2025-03-22T21:02:34.540+00:00 -45.50603 167.48374cmd = `suncalc-cli $t $(tdf[i, :latitude]) $(tdf[i, :longitude])`json = read(cmd, String) |>x -> JSON3.read(x)solar = is_solar_night(t, json.sunTimes.sunrise, json.sunTimes.sunset)civil = is_civil_night(t, json.sunTimes.dawn, json.sunTimes.dusk)moon = json.moon.phase |> x -> round(x, digits=2)push!(s, solar)push!(c, civil)push!(m, moon)println(i, " ", tdf[i, :night], " ", solar, civil, moon)endtdf.maybe_solar_night = stdf.maybe_civil_night = ctdf.moon_phase = mCSV.write("twenty_four_seven_files4.tsv", tdf,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)pdf.id = [nanoid() for x in 1:length(pdf.id)]tdf.id = [nanoid() for x in 1:length(tdf.id)]# for aws s3# nanoid(12; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890")pdf.storage_prefix = [(nanoid(12; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890") * "/" * nanoid(; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890")) for x in 1:length(pdf.storage_prefix)]tdf.storage_prefix = [(nanoid(12; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890") * "/" * nanoid(; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890")) for x in 1:length(tdf.storage_prefix)]CSV.write("pomona_files5.tsv", pdf,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)filter(x -> x.description != "MC2 2025-02-01", tdf)CSV.write("twenty_four_seven_files5.tsv", tdf,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)####tidy up files, deleted MC2 ####################################### cluster_id# join on p/t file.description , cluster.name to get cluster_idcdf=CSV.read("cluster.tsv", DataFrame)select!(cdf, :id=> "cluster_id", :name => "description", :dataset_id )select!(pdf, Not([:cluster_id]))select!(tdf, Not([:cluster_id]))j_pdf = leftjoin(pdf, cdf, on = [:description, :dataset_id])j_tdf = leftjoin(tdf, cdf, on = [:description, :dataset_id])CSV.write("pomona_files6.tsv", j_pdf,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)CSV.write("twenty_four_seven_files6.tsv", j_tdf,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)# filter out my junk/mistakes 7 0's, 50 with duration not integer, see file_deleted.csvfilter!(x -> x.duration > 0, j_pdf) #deleted 7 with 0 duration after checking pomona_files in duckdb, also 1 GR6 unconfirmedfilter!(x -> (x.duration - trunc(x.duration)) == 0, j_pdf) #deleted 50 with duration not integer, see file_deleted.csvfilter!(x -> x.duration > 0, j_tdf) #deleted 7 with 0 duration after checking pomona_files in duckdb, also 1 GR6 unconfirmedfilter!(x -> (x.duration - trunc(x.duration)) == 0, j_tdf) #deleted 50 with duration not integer, see file_deleted.csv# should have fixed timestamp_local here, see fix below insteadCSV.write("pomona_files7.tsv", j_pdf,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)CSV.write("twenty_four_seven_files7.tsv", j_tdf,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)# file tablefile_a=select(j_pdf, :id, :file_name, :path, :xxh64 => "xxh64_hash", :location_id, :timestamp_local, :cluster_id, :duration, :sample_rate, :description, :storage_prefix, :upload, :maybe_solar_night, :maybe_civil_night, :moon_phase, :created_by, :created_at, :last_modified, :modified_by, :active)file_b=select(j_tdf, :id, :file_name, :path, :xxh64 => "xxh64_hash", :location_id, :timestamp_local, :cluster_id, :duration, :sample_rate, :description, :storage_prefix, :upload, :maybe_solar_night, :maybe_civil_night, :moon_phase, :created_by, :created_at, :last_modified, :modified_by, :active)file=vcat(file_a, file_b)CSV.write("file.tsv", file,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)skraak_db=> \copy file FROM 'file.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''ERROR: insert or update on table "file" violates foreign key constraint "file_location_id_fkey"DETAIL: Key (location_id)=(Yx0oNUDmP5ch) is not present in table "location"# maybe I need to check timestamp_local and file_name before adding data below ####################################################################################### i have dataset_id in field file.location_id### fix here### join location table and put location_id in correct fieldfdf=CSV.read("file.tsv", DataFrame)rename!(fdf, :location_id => "dataset_id")fdf.name= fdf.description |> x -> split.(x, " ") |> x -> first.(x)ldf=CSV.read("location.tsv", DataFrame)select!(ldf, :id => "location_id", :dataset_id, :name)# joinfile=leftjoin(fdf, ldf, on = [:dataset_id, :name])select!(file, :id, :file_name, :path, :xxh64_hash, :location_id, :timestamp_local, :cluster_id, :duration, :sample_rate, :description, :storage_prefix, :upload, :maybe_solar_night, :maybe_civil_night, :moon_phase, :created_by, :created_at, :last_modified, :modified_by, :active)CSV.write("file2.tsv", file,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)# make sure timestamp_local is accurate, should have done earlier# already have a file df loaded as filemd=CSV.read("moth_metadata.tsv", DataFrame)select!(md, :file_id => "id", :timestamp)jdf=join(file, md, on = :id)x = jdf.timestamp |>x -> String.(x) |>x -> ZonedDateTime.(x, "yyyy-mm-dd HH:MM:SSzzzz") |>x -> astimezone.(x, tz"Pacific/Auckland")jdf.timestamp_local = xselect!(jdf, Not([:timestamp]))names(jdf) == names(file)CSV.write("file3.tsv", jdf,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)# moth_metadata tablefile_a=select(j_pdf, :id => "file_id", :timestamp, :recorder_id, :gain, :battery_v, :temp_c, :created_at, :created_by, :last_modified, :modified_by)file_b=select(j_tdf, :id => "file_id", :timestamp, :recorder_id, :gain, :battery_v, :temp_c, :created_at, :created_by, :last_modified, :modified_by)file=vcat(file_a, file_b)CSV.write("moth_metadata.tsv", file,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)# file_dataset tablefile_a=select(j_pdf, :id => "file_id", :dataset_id, :created_at, :created_by, :last_modified, :modified_by)file_b=select(j_tdf, :id => "file_id", :dataset_id, :created_at, :created_by, :last_modified, :modified_by)file=vcat(file_a, file_b)CSV.write("file_dataset.tsv", file,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)# species and call_type tables constructed in easycsv as very small astimezone# selectionsselection needs::id, :file_id, :dataset_id, :start_time, :end_time, :freq_low, :freq_high, :description, :storage_prefix, :upload, :approved, :is_solar_night, :is_civil_night, :moon_phase, :created_by, :created_at, :last_modified, :modified_by, :activeid, file_id, dataset_id, start_time, end_time, freq_low, freq_high, description, storage_prefix, upload, approved, is_solar_night, is_civil_night, moon_phase, created_by, created_at, last_modified, modified_by, activeselection metadata needs::selection_id, :json, :created_at, :created_by, :last_modified, :modified_bylabel needs::id, :selection_id, :species_id, :filter, :certainty, :created_at, :created_by, :last_modified, :modified_by, :activelabel_subtype needs::id, :label_id, :calltype_id, :filter, :certainty, :created_at, :created_by, :last_modified, :modified_by, :active# pomona_labels_avianzCOPY pomona_labels_avianz TO 'pomona_labels_avianz.tsv' (FORMAT 'csv',DELIMITER '\t',HEADER true -- leave all the other stuff out or it miss-formats noise_type);add columns 1 by 1adf=CSV.read("pomona_labels_avianz.tsv", DataFrame)# NB: need dataset_id for join on fileadf.dataset_id=["Yx0oNUDmP5ch" for x in 1:nrow(adf)] #all pomonaadf.id=[nanoid() for x in 1:length(adf.file)]file_id, join on file tablefile=CSV.read("file3.tsv", DataFrame)select!(file, :id, :file_name, :description)file.location = split.(file.description, " ") |> x -> first.(x)select!(file, Not([:description]))rename!(file, :id => :file_id, :file_name => :file)file_dataset=CSV.read("file_dataset.tsv", DataFrame)select!(file_dataset, :file_id, :dataset_id)file=leftjoin(file, file_dataset, on = :file_id)NB: join on all 3 below, do not leave out dataset_id there are duplecates accross datasetsadf=leftjoin(adf, file, on = [:location, :file, :dataset_id])select!(adf, :location,:file,:box,:k_set,:duet,:k_close,:k_ok,:k_far,:k_m,:k_f,:k_mf,:k_umf,:other_label,:noise_level,:noise_type,:id,:file_id,:dataset_id)findall(ismissing, adf.file_id) #8374-8396 #leftjoin failed due to missings, saved them to a csvdeleteat!(adf, 8374:8396)saved to csvadf.box = map(x -> eval(Meta.parse(x)), adf.box)adf.start_time = getindex.(adf.box, 1) |> x -> ceil.(x, digits=3)adf.end_time = getindex.(adf.box, 2) |> x -> floor.(x, digits=3)adf.freq_low = getindex.(adf.box, 3) |> x -> ceil.(x, digits=3)adf.freq_high = getindex.(adf.box, 4) |> x -> floor.(x, digits=3)saved to csvx=[]for row in eachrow(adf)y=row.location * " " * row.filepush!(x, y)endadf.description = xsaved to csvadf.storage_prefix = [(nanoid(12; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890") * "/" * nanoid(; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890")) for x in 1:length(adf.file)]saved to csvadf.upload = [true for x in 1:length(adf.file)]adf.approved = [true for x in 1:length(adf.file)]saved to csvneed to join timestamp from file table, have file_id so can join on thatfile_time=CSV.read("moth_metadata.tsv", DataFrame)select!(file_time, :file_id, :timestamp)adf=leftjoin(adf, file_time, on = :file_id)findall(ismissing, adf.timestamp) #Int64[]need latitude and longitude from file - locationfile_location=CSV.read("file3.tsv", DataFrame)select!(file_location, :id => "file_id", :location_id)location=CSV.read("location.tsv", DataFrame)select!(location, :id => "location_id", :latitude, :longitude)ldf=leftjoin(file_location, location, on = :location_id)select!(ldf, Not([:location_id]))adf=leftjoin(adf, ldf, on = :file_id)saved to csvusing Dates, TimeZones, JSON3functions is_solar_night and is_civil_night from aboves=[]c=[]m=[]for i in 1:nrow(adf)t = ZonedDateTime(String(adf[i, :timestamp]), "yyyy-mm-dd HH:MM:SSzzzz") |>x -> astimezone(x, tz"UTC") |>x -> Dates.format(x, "yyyy-mm-ddTHH:MM:SSzzzz")# suncalc-cli 2025-03-22T21:02:34.540+00:00 -45.50603 167.48374cmd = `suncalc-cli $t $(adf[i, :latitude]) $(adf[i, :longitude])`json = read(cmd, String) |>x -> JSON3.read(x)solar = is_solar_night(t, json.sunTimes.sunrise, json.sunTimes.sunset)civil = is_civil_night(t, json.sunTimes.dawn, json.sunTimes.dusk)moon = json.moon.phase |> x -> round(x, digits=2)push!(s, solar)push!(c, civil)push!(m, moon)println(i, " ", solar, civil, moon)endadf.is_solar_night = sadf.is_civil_night = cadf.moon_phase = msaved to csvadf.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:length(adf.file)]adf.created_at=["2025-03-31T13:14:50.083+00:00" for x in 1:length(adf.file)]adf.last_modified=["2025-03-31T13:14:50.083+00:00" for x in 1:length(adf.file)]adf.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:length(adf.file)]adf.active=[true for x in 1:length(adf.file)]saved to csvselection=select(adf, :id, :file_id, :dataset_id, :start_time, :end_time, :freq_low, :freq_high, :description, :storage_prefix, :upload, :approved, :is_solar_night, :is_civil_night, :moon_phase, :created_by, :created_at, :last_modified, :modified_by, :active)CSV.write("selection_avianz.tsv", selection,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)adf.selection_metadata = ["" for x in 1:nrow(adf)]adf.selection_metadata[adf.k_close .== true] .= """{"distance":"close"}"""adf.selection_metadata[adf.k_ok .== true] .= """{"distance":"ok"}"""adf.selection_metadata[adf.k_far .== true] .= """{"distance":"far"}"""selection_metadata= filter(row -> !ismissing(row.selection_metadata), adf) |>x -> filter(row -> row.selection_metadata != "", x) |>x -> select(x, :id => "selection_id", :selection_metadata => "json", :created_at, :created_by, :last_modified, :modified_by)CSV.write("selection_metadata_avianz.tsv", selection_metadata,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)using JSON3adf.file_metadata = ["" for x in 1:nrow(adf)]for row in eachrow(adf)dict=Dict()!ismissing(row.noise_level) && (dict["noiseLevel"] = row.noise_level)!ismissing(row.noise_type) && (dict["noiseType"] = row.noise_type)json_string = JSON3.write(dict)row.file_metadata = json_string#println(json_string)endsaved adf to csvCSV.write("pomona_labels_avianz.tsv", adf,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)file_metadata= filter(row -> !ismissing(row.file_metadata), adf) |>x -> filter(row -> row.file_metadata != "", x) |>x -> select(x, :file_id, :file_metadata => "json", :created_at, :created_by, :last_modified, :modified_by) |>x -> groupby(x, :file_id) |>x -> combine(x, first)CSV.write("file_metadata_avianz.tsv", file_metadata,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)need::species_id, :filter, :certainty:calltype_id, :filter, :certaintyignore k_set, amalgamate m+mf+umf, amalgamate f+mf, take duetl_arr=[]lst_arr=[]for row in eachrow(adf)l_id = nanoid()if row.k_set #not using k_mf or k_umfpush!(l_arr, (id=l_id, selection_id=row.id, species_id="ANhV8iZPfIh8")) #Haast Kiwirow.k_m && push!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id = "dD9cLYtq_G5a")) #Mrow.k_f && push!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id = "DJ4cFnVHc7NM")) #Fif row.duet #so it has the correct label_idpush!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id="dD9cLYtq_G5a")) #Dendend#its either k_set or 'other_label', never both at the same timeoccursin("Morepork", coalesce(row.other_label, "")) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="OA9X68Mrv4v8"))occursin("Kaka (Sth Is)", coalesce(row.other_label, "")) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="MMOLwZw9fVSn"))occursin("Don't Know", coalesce(row.other_label, "")) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="e-bBi7tasfya"))occursin("Cuckoo (Long-tailed)", coalesce(row.other_label, "")) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="rqIcB4EkSy2G"))endlabel=DataFrame(l_arr)label.filter_id=["6C-d1OtNF3GJ" for x in 1:nrow(label)]label.certainty=[100 for x in 1:nrow(label)]label.created_at=["2025-04-01T07:04:07.167+00:00" for x in 1:nrow(label)]label.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label)]label.last_modified=["2025-04-01T07:04:07.167+00:00" for x in 1:nrow(label)]label.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label)]label.active=[true for x in 1:nrow(label)]CSV.write("label_avianz.tsv", label,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)label_subtype=DataFrame(lst_arr)label_subtype.filter_id=["E721dNTaHio7" for x in 1:nrow(label_subtype)]label_subtype.certainty=[100 for x in 1:nrow(label_subtype)]label_subtype.created_at=["2025-04-01T07:04:07.167+00:00" for x in 1:nrow(label_subtype)]label_subtype.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label_subtype)]label_subtype.last_modified=["2025-04-01T07:04:07.167+00:00" for x in 1:nrow(label_subtype)]label_subtype.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label_subtype)]label_subtype.active=[true for x in 1:nrow(label_subtype)]CSV.write("label_subtype_avianz.tsv", label_subtype,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)species-dataset tablejust started with the species table and edited it in easycsv# pomona_labels_20230418COPY pomona_labels_20230418 TO 'pomona_labels_20230418.tsv' (FORMAT 'csv',DELIMITER '\t',HEADER true,QUOTE '"', -- Use double quotes for quotingESCAPE '\', -- Use backslash as escape characterNULL '' -- How NULL values should appear);selection needs::id, :file_id, :dataset_id, :start_time, :end_time, :freq_low, :freq_high, :description, :storage_prefix, :upload, :approved, :is_solar_night, :is_civil_night, :moon_phase, :created_by, :created_at, :last_modified, :modified_by, :activeid, file_id, dataset_id, start_time, end_time, freq_low, freq_high, description, storage_prefix, upload, approved, is_solar_night, is_civil_night, moon_phase, created_by, created_at, last_modified, modified_by, activelabel needs::id, :selection_id, :species_id, :filter, :certainty, :created_at, :created_by, :last_modified, :modified_by, :activelabel_subtype needs::id, :label_id, :calltype_id, :filter, :certainty, :created_at, :created_by, :last_modified, :modified_by, :activeusing DataFrames, CSV, Dates, TimeZones, Nanoidpdf=CSV.read("pomona_labels_20230418.tsv", DataFrame)pdf.dataset_id=["Yx0oNUDmP5ch" for x in 1:nrow(pdf)] #all pomonapdf.id=[nanoid() for x in 1:length(pdf.file)]#file_id, join on file tablefile=CSV.read("file3.tsv", DataFrame)select!(file, :id, :file_name, :description)file.location = split.(file.description, " ") |> x -> first.(x)select!(file, Not([:description]))rename!(file, :id => :file_id, :file_name => :file)file_dataset=CSV.read("file_dataset.tsv", DataFrame)select!(file_dataset, :file_id, :dataset_id)file=leftjoin(file, file_dataset, on = :file_id)pdf=leftjoin(pdf, file, on = [:location, :file, :dataset_id])select!(pdf, :location,:file,:box,:male,:female,:duet,:not_kiwi,:other_label,:close_call,:ok_call,:far_call,:low_noise,:medium_noise,:high_noise,:terrible_noise,:id,:file_id,:dataset_id)findall(ismissing, pdf.file_id) #69688:6969169688 │ C05 20211124_033000.WAV [69689 │ M02 20211124_214500.WAV [69690 │ C05 20230215_030000.WAV [69691 │ C05 20230215_030000.WAVdeleteat!(pdf,69688:69691)CSV.write("pomona_labels_20230418.tsv", pdf,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)pdf.box = map(x -> eval(Meta.parse(x)), pdf.box)pdf.start_time = getindex.(pdf.box, 1) |> x -> ceil.(x, digits=3)pdf.end_time = getindex.(pdf.box, 2) |> x -> floor.(x, digits=3)pdf.freq_low = [missing for x in 1:nrow(pdf)]pdf.freq_high = [missing for x in 1:nrow(pdf)]x=[]for row in eachrow(pdf)y=row.location * " " * row.filepush!(x, y)endpdf.description = xpdf.storage_prefix = [(nanoid(12; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890") * "/" * nanoid(; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890")) for x in 1:length(pdf.file)]pdf.upload = [true for x in 1:length(pdf.file)]pdf.approved = [true for x in 1:length(pdf.file)]# need to join timestamp from file table, have file_id so can join on thatfile_time=CSV.read("moth_metadata.tsv", DataFrame)select!(file_time, :file_id, :timestamp)pdf=leftjoin(pdf, file_time, on = :file_id)findall(ismissing, pdf.timestamp) #Int64[]# need latitude and longitude from file - locationfile_location=CSV.read("file3.tsv", DataFrame)select!(file_location, :id => "file_id", :location_id)location=CSV.read("location.tsv", DataFrame)select!(location, :id => "location_id", :latitude, :longitude)ldf=leftjoin(file_location, location, on = :location_id)select!(ldf, Not([:location_id]))pdf=leftjoin(pdf, ldf, on = :file_id)using Dates, TimeZones, JSON3functions is_solar_night and is_civil_night from aboves=[]c=[]m=[]for i in 1:nrow(pdf)t = ZonedDateTime(String(pdf[i, :timestamp]), "yyyy-mm-dd HH:MM:SSzzzz") |>x -> astimezone(x, tz"UTC") |>x -> Dates.format(x, "yyyy-mm-ddTHH:MM:SSzzzz")# suncalc-cli 2025-03-22T21:02:34.540+00:00 -45.50603 167.48374cmd = `suncalc-cli $t $(pdf[i, :latitude]) $(pdf[i, :longitude])`json = read(cmd, String) |>x -> JSON3.read(x)solar = is_solar_night(t, json.sunTimes.sunrise, json.sunTimes.sunset)civil = is_civil_night(t, json.sunTimes.dawn, json.sunTimes.dusk)moon = json.moon.phase |> x -> round(x, digits=2)push!(s, solar)push!(c, civil)push!(m, moon)println(i, " ", solar, civil, moon)endpdf.is_solar_night = spdf.is_civil_night = cpdf.moon_phase = m#saved to csvCSV.write("pomona_labels_20230418.tsv", pdf,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)pdf.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:length(pdf.file)]pdf.created_at=["2025-04-02T20:53:47.171+00:00" for x in 1:length(pdf.file)]pdf.last_modified=["2025-03-31T13:14:50.083+00:00" for x in 1:length(pdf.file)]pdf.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:length(pdf.file)]pdf.active=[true for x in 1:length(pdf.file)]# saved pdf to csvselection=select(pdf, :id, :file_id, :dataset_id, :start_time, :end_time, :freq_low, :freq_high, :description, :storage_prefix, :upload, :approved, :is_solar_night, :is_civil_night, :moon_phase, :created_by, :created_at, :last_modified, :modified_by, :active)CSV.write("selection_pomona_20230418.tsv", selection,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)l_arr=[]lst_arr=[]s_meta=[]for row in eachrow(pdf)l_id = nanoid()if true in [row.male, row.female, row.duet]push!(l_arr, (id=l_id, selection_id=row.id, species_id="ANhV8iZPfIh8")) #Haast Kiwirow.male && push!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id = "dD9cLYtq_G5a")) #Mrow.female && push!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id = "DJ4cFnVHc7NM")) #Frow.duet && push!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id="eDZD2ET_Dghm")) #D# do selection metadata seperatelyendx=coalesce(row.other_label, "")# Q, Question will go to selection metadata, also Baby kiwi will go to there as welloccursin("Kiwi", x) && beginpush!(l_arr, (id=l_id, selection_id=row.id, species_id="ANhV8iZPfIh8")) #Haast Kiwipush!(s_meta, (selection_id=row.id, json="""{"note":"Baby Kiwi maybe?"}"""))endoccursin("Q", x) && push!(s_meta, (selection_id=row.id, json="""{"note":"Question"}"""))if row.not_kiwiisempty(x) && beginpush!(l_arr, (id=l_id, selection_id=row.id, species_id="e-bBi7tasfya")) # Don't Knowpush!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id="GiUuQbRfmG30")) # Notendoccursin("Morepork", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="OA9X68Mrv4v8"))occursin("Kaka", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="MMOLwZw9fVSn"))occursin("Don't Know", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="e-bBi7tasfya"))occursin("Cuckoo (Long-tailed)", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="rqIcB4EkSy2G"))occursin("G", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="YPFfCCNvDuuK"))occursin("Kea", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="CWYXOm6bKs4V"))endendlabel=DataFrame(l_arr)label.filter_id=["uWoU4s46qSPd" for x in 1:nrow(label)]label.certainty=[100 for x in 1:nrow(label)]label.created_at=["2025-04-04T02:51:11.257+00:00" for x in 1:nrow(label)]label.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label)]label.last_modified=["2025-04-04T02:51:11.257+00:00" for x in 1:nrow(label)]label.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label)]label.active=[true for x in 1:nrow(label)]CSV.write("label_pomona_20230418.tsv", label,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)label_subtype=DataFrame(lst_arr)label_subtype.filter_id=["ZIm0-Cs_FOmP" for x in 1:nrow(label_subtype)]label_subtype.certainty=[100 for x in 1:nrow(label_subtype)]label_subtype.created_at=["2025-04-04T02:51:11.257+00:00" for x in 1:nrow(label_subtype)]label_subtype.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label_subtype)]label_subtype.last_modified=["2025-04-04T02:51:11.257+00:00" for x in 1:nrow(label_subtype)]label_subtype.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label_subtype)]label_subtype.active=[true for x in 1:nrow(label_subtype)]CSV.write("label_subtype_pomona_20230418.tsv", label_subtype,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)# s_metaselection_meta=DataFrame(s_meta)selection_meta.created_at=["2025-04-04T02:51:11.257+00:00" for x in 1:nrow(selection_meta)]selection_meta.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(selection_meta)]selection_meta.last_modified=["2025-04-04T02:51:11.257+00:00" for x in 1:nrow(selection_meta)]selection_meta.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(selection_meta)]CSV.write("selection_metadata_pomona_20230418.tsv", selection_meta,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)CSV.write("pomona_labels_20230418.tsv", pdf,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)# pomona_labels_20250304COPY pomona_labels_20250304 TO 'pomona_labels_20250304.tsv' (FORMAT 'csv',DELIMITER '\t',HEADER true,QUOTE '"', -- Use double quotes for quotingESCAPE '\', -- Use backslash as escape characterNULL '' -- How NULL values should appear);pdf=CSV.read("pomona_labels_20250304.tsv", DataFrame)pdf.dataset_id=["Yx0oNUDmP5ch" for x in 1:nrow(pdf)] #all pomonapdf.id=[nanoid() for x in 1:length(pdf.file)]#file_id, join on file tablefile=CSV.read("file3.tsv", DataFrame)select!(file, :id, :file_name, :description)file.location = split.(file.description, " ") |> x -> first.(x)select!(file, Not([:description]))rename!(file, :id => :file_id, :file_name => :file)file_dataset=CSV.read("file_dataset.tsv", DataFrame)select!(file_dataset, :file_id, :dataset_id)file=leftjoin(file, file_dataset, on = :file_id)pdf=leftjoin(pdf, file, on = [:location, :file, :dataset_id])select!(pdf, :location,:file,:box,:male,:female,:duet,:not_kiwi,:other_label,:id,:file_id,:dataset_id)findall(ismissing, pdf.file_id) #83330:8333383330833318333283333deleteat!(pdf,83330:83333)CSV.write("pomona_labels_20250304.tsv", pdf,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)pdf.box = map(x -> eval(Meta.parse(x)), pdf.box)pdf.start_time = getindex.(pdf.box, 1) |> x -> ceil.(x, digits=3)pdf.end_time = getindex.(pdf.box, 2) |> x -> floor.(x, digits=3)pdf.freq_low = [missing for x in 1:nrow(pdf)]pdf.freq_high = [missing for x in 1:nrow(pdf)]x=[]for row in eachrow(pdf)y=row.location * " " * row.filepush!(x, y)endpdf.description = xpdf.storage_prefix = [(nanoid(12; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890") * "/" * nanoid(; alphabet = "abcdefghijklmnopqrstuvwxyz1234567890")) for x in 1:length(pdf.file)]pdf.upload = [true for x in 1:length(pdf.file)]pdf.approved = [true for x in 1:length(pdf.file)]# need to join timestamp from file table, have file_id so can join on thatfile_time=CSV.read("moth_metadata.tsv", DataFrame)select!(file_time, :file_id, :timestamp)pdf=leftjoin(pdf, file_time, on = :file_id)findall(ismissing, pdf.timestamp) #Int64[]# need latitude and longitude from file - locationfile_location=CSV.read("file3.tsv", DataFrame)select!(file_location, :id => "file_id", :location_id)location=CSV.read("location.tsv", DataFrame)select!(location, :id => "location_id", :latitude, :longitude)ldf=leftjoin(file_location, location, on = :location_id)select!(ldf, Not([:location_id]))pdf=leftjoin(pdf, ldf, on = :file_id)using Dates, TimeZones, JSON3functions is_solar_night and is_civil_night from aboves=[]c=[]m=[]for i in 1:nrow(pdf)t = ZonedDateTime(String(pdf[i, :timestamp]), "yyyy-mm-dd HH:MM:SSzzzz") |>x -> astimezone(x, tz"UTC") |>x -> Dates.format(x, "yyyy-mm-ddTHH:MM:SSzzzz")# suncalc-cli 2025-03-22T21:02:34.540+00:00 -45.50603 167.48374cmd = `suncalc-cli $t $(pdf[i, :latitude]) $(pdf[i, :longitude])`json = read(cmd, String) |>x -> JSON3.read(x)solar = is_solar_night(t, json.sunTimes.sunrise, json.sunTimes.sunset)civil = is_civil_night(t, json.sunTimes.dawn, json.sunTimes.dusk)moon = json.moon.phase |> x -> round(x, digits=2)push!(s, solar)push!(c, civil)push!(m, moon)println(i, " ", solar, civil, moon)endpdf.is_solar_night = spdf.is_civil_night = cpdf.moon_phase = m#saved to csvCSV.write("pomona_labels_20250304.tsv", pdf,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)pdf.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:length(pdf.file)]pdf.created_at=["2025-04-04T17:57:30.649+00:00" for x in 1:length(pdf.file)]pdf.last_modified=["2025-04-04T17:57:30.649+00:00" for x in 1:length(pdf.file)]pdf.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:length(pdf.file)]pdf.active=[true for x in 1:length(pdf.file)]# saved pdf to csvselection=select(pdf, :id, :file_id, :dataset_id, :start_time, :end_time, :freq_low, :freq_high, :description, :storage_prefix, :upload, :approved, :is_solar_night, :is_civil_night, :moon_phase, :created_by, :created_at, :last_modified, :modified_by, :active)CSV.write("selection_pomona_20250304.tsv", selection,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)l_arr=[]lst_arr=[]s_meta=[]for row in eachrow(pdf)l_id = nanoid()if true in [row.male, row.female, row.duet]push!(l_arr, (id=l_id, selection_id=row.id, species_id="ANhV8iZPfIh8")) #Haast Kiwirow.male && push!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id = "dD9cLYtq_G5a")) #Mrow.female && push!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id = "DJ4cFnVHc7NM")) #Frow.duet && push!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id="eDZD2ET_Dghm")) #D# do selection metadata seperatelyendx=coalesce(row.other_label, "")# Q, Question will go to selection metadata, also Baby kiwi will go to there as welloccursin("Kiwi", x) && begin # Maybe Baby Kiwipush!(l_arr, (id=l_id, selection_id=row.id, species_id="ANhV8iZPfIh8")) #Haast Kiwipush!(s_meta, (selection_id=row.id, json="""{"note":"Baby Kiwi maybe?"}"""))endoccursin("Q", x) && push!(s_meta, (selection_id=row.id, json="""{"note":"Question"}"""))if row.not_kiwiisempty(x) && beginpush!(l_arr, (id=l_id, selection_id=row.id, species_id="e-bBi7tasfya")) # Don't Knowpush!(lst_arr, (id=nanoid(), label_id=l_id, calltype_id="GiUuQbRfmG30")) # Notendoccursin("Morepork", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="OA9X68Mrv4v8"))occursin("Kaka", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="MMOLwZw9fVSn"))occursin("Don't Know", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="e-bBi7tasfya"))occursin("Cuckoo (Long-tailed)", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="rqIcB4EkSy2G"))occursin("G", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="YPFfCCNvDuuK")) #Geckooccursin("Kea", x) && push!(l_arr, (id=l_id, selection_id=row.id, species_id="CWYXOm6bKs4V"))endendlabel=DataFrame(l_arr)label.filter_id=["uWoU4s46qSPd" for x in 1:nrow(label)]label.certainty=[100 for x in 1:nrow(label)]label.created_at=["2025-04-04T18:08:30.111+00:00" for x in 1:nrow(label)]label.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label)]label.last_modified=["2025-04-04T18:08:30.111+00:00" for x in 1:nrow(label)]label.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label)]label.active=[true for x in 1:nrow(label)]CSV.write("label_pomona_20250304.tsv", label,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)label_subtype=DataFrame(lst_arr)label_subtype.filter_id=["ZIm0-Cs_FOmP" for x in 1:nrow(label_subtype)]label_subtype.certainty=[100 for x in 1:nrow(label_subtype)]label_subtype.created_at=["2025-04-04T18:08:30.111+00:00" for x in 1:nrow(label_subtype)]label_subtype.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label_subtype)]label_subtype.last_modified=["2025-04-04T18:08:30.111+00:00" for x in 1:nrow(label_subtype)]label_subtype.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(label_subtype)]label_subtype.active=[true for x in 1:nrow(label_subtype)]CSV.write("label_subtype_pomona_20250304.tsv", label_subtype,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)# s_metaselection_meta=DataFrame(s_meta)selection_meta.created_at=["2025-04-04T02:51:11.257+00:00" for x in 1:nrow(selection_meta)]selection_meta.created_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(selection_meta)]selection_meta.last_modified=["2025-04-04T18:08:30.111+00:00" for x in 1:nrow(selection_meta)]selection_meta.modified_by=["kp_8663c3b3d4654734874b17d846bbe52d" for x in 1:nrow(selection_meta)]CSV.write("selection_metadata_pomona_20250304.tsv", selection_meta,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)CSV.write("pomona_labels_20250304.tsv", pdf,delim = '\t', # Tab delimiterheader = true, # Include header (default)quotechar = '"', # Character used for quotingmissingstring = "", # How missing values are writtenappend = false # Whether to append to an existing file)# make copy of postgres to duckdbduckdb skraak.duuckdbINSTALL postgres; -- only run first timeLOAD postgres;-- Attach the PostgreSQL database using the given connection stringATTACH 'host=localhost port=5432 dbname=mydb' AS test (TYPE postgres);-- The table "tbl_name" can now be queried as if it is a regular tableSELECT * FROM test.tbl_name;-- Switch the active database to "test"USE test;-- List all tables in the fileSHOW TABLES;-- copy whole pg db to duckdbCOPY FROM DATABASE postgres_db TO my_duckdb_db;
// Drizzle handles materialized views differently, excluded the materialized view `ebird_taxonomy_v2024` from this conversion. Implement that at the query level in application code.// CHECK Constraints not implemented in drizzle// location table// CONSTRAINT latitude_range CHECK (latitude BETWEEN -90.0 AND 90.0)// CONSTRAINT longitude_range CHECK (longitude BETWEEN -180.0 AND 180.0)// GIN Indexes for JSONB not implemented in drizzle// INDEX idx_file_metadata_json_gin ON file_metadata USING gin(json)import { pgEnum, pgTable, varchar, boolean, timestamp, decimal, integer, json, primaryKey, index, unique, foreignKey, uniqueIndex, bigint } from "drizzle-orm/pg-core";import { sql } from "drizzle-orm";// Enum typesexport const datasetTypeEnum = pgEnum("dataset_type", ["organise", "test", "train"]);export const gainLevelEnum = pgEnum("gain_level", ["low", "low-medium", "medium", "medium-high", "high"]);export const roleNameEnum = pgEnum("role_name", ["ADMIN", "USER", "CURATOR"]);export const permissionNameEnum = pgEnum("permission_name", ["READ", "UPLOAD", "DOWNLOAD", "EDIT", "DELETE"]);// Dataset Tableexport const dataset = pgTable("dataset", {id: varchar("id", { length: 12 }).primaryKey(), // nanoid(12)name: varchar("name", { length: 255 }).notNull(),public: boolean("public").default(false), // Whether dataset is publicly accessibledescription: varchar("description", { length: 255 }),createdBy: varchar("created_by", { length: 64 }).notNull(), // kinde user idcreatedAt: timestamp("created_at", { withTimezone: true }).defaultNow(),lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),modifiedBy: varchar("modified_by", { length: 64 }).notNull(), // kinde user idowner: varchar("owner", { length: 64 }).notNull(), // kinde user idactive: boolean("active").default(true),type: datasetTypeEnum("type").notNull().default("organise"),}, (table) => ({nameIdx: index("idx_dataset_name").on(table.name),ownerIdx: index("idx_dataset_owner").on(table.owner),activeIdx: index("idx_dataset_active").on(table.active),publicIdx: index("idx_dataset_public").on(table.public),}));// Locations Tableexport const location = pgTable("location", {id: varchar("id", { length: 12 }).primaryKey(), // nanoid(12)datasetId: varchar("dataset_id", { length: 12 }).notNull().references(() => dataset.id),name: varchar("name", { length: 140 }).notNull(),latitude: decimal("latitude", { precision: 10, scale: 7 }), // -45.5027longitude: decimal("longitude", { precision: 10, scale: 7 }), // 167.48406description: varchar("description", { length: 255 }),createdBy: varchar("created_by", { length: 64 }).notNull(),createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),modifiedBy: varchar("modified_by", { length: 64 }).notNull(),active: boolean("active").default(true),}, (table) => ({nameIdx: index("idx_location_name").on(table.name),datasetIdx: index("idx_location_dataset").on(table.datasetId),activeIdx: index("idx_location_active").on(table.active),datasetActiveIdx: index("idx_location_dataset_active").on(table.datasetId, table.active),}));// Cyclic Recording Pattern Tableexport const cyclicRecordingPattern = pgTable("cyclic_recording_pattern", {id: varchar("id", { length: 12 }).primaryKey(), // nanoid(12)recordS: integer("record_s").notNull(),sleepS: integer("sleep_s").notNull(),createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),createdBy: varchar("created_by", { length: 64 }).notNull(),lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),modifiedBy: varchar("modified_by", { length: 64 }).notNull(),});// Cluster Tableexport const cluster = pgTable("cluster", {id: varchar("id", { length: 12 }).primaryKey(), // nanoid(12)datasetId: varchar("dataset_id", { length: 12 }).notNull().references(() => dataset.id),locationId: varchar("location_id", { length: 12 }).notNull().references(() => location.id),name: varchar("name", { length: 140 }).notNull(),description: varchar("description", { length: 255 }),createdBy: varchar("created_by", { length: 64 }).notNull(),createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),modifiedBy: varchar("modified_by", { length: 64 }).notNull(),active: boolean("active").default(true),timezoneId: varchar("timezone_id", { length: 40 }),cyclicRecordingPatternId: varchar("cyclic_recording_pattern_id", { length: 12 }).references(() => cyclicRecordingPattern.id),}, (table) => ({datasetIdx: index("idx_cluster_dataset").on(table.datasetId),activeIdx: index("idx_cluster_active").on(table.active),datasetActiveIdx: index("idx_cluster_dataset_active").on(table.datasetId, table.active),recordingPatternIdx: index("idx_cluster_recording_pattern").on(table.cyclicRecordingPatternId),}));// Files Tableexport const file = pgTable("file", {id: varchar("id", { length: 21 }).primaryKey(), // nanoidfileName: varchar("file_name", { length: 255 }).notNull(),path: varchar("path", { length: 255 }),xxh64Hash: varchar("xxh64_hash", { length: 16 }).notNull(),locationId: varchar("location_id", { length: 12 }).notNull().references(() => location.id),timestampLocal: timestamp("timestamp_local", { withTimezone: true }).notNull(),clusterId: varchar("cluster_id", { length: 12 }).references(() => cluster.id),duration: decimal("duration", { precision: 7, scale: 3 }).notNull(),sampleRate: integer("sample_rate").notNull(),description: varchar("description", { length: 255 }),storagePrefix: varchar("storage_prefix", { length: 34 }).notNull(),upload: boolean("upload").default(true),maybeSolarNight: boolean("maybe_solar_night"),maybeCivilNight: boolean("maybe_civil_night"),moonPhase: decimal("moon_phase", { precision: 3, scale: 2 }),createdBy: varchar("created_by", { length: 64 }).notNull(),createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),modifiedBy: varchar("modified_by", { length: 64 }).notNull(),active: boolean("active").default(true),}, (table) => ({locationIdx: index("idx_file_location").on(table.locationId),activeIdx: index("idx_file_active").on(table.active),timestampLocalIdx: index("idx_file_timestamp_local").on(table.timestampLocal),clusterIdx: index("idx_file_cluster").on(table.clusterId),maybeSolarNightIdx: index("idx_file_maybe_solar_night").on(table.maybeSolarNight),maybeCivilNightIdx: index("idx_file_maybe_civil_night").on(table.maybeCivilNight),}));// Moth Metadata Tableexport const mothMetadata = pgTable("moth_metadata", {fileId: varchar("file_id", { length: 21 }).primaryKey().references(() => file.id),timestamp: timestamp("timestamp", { withTimezone: true }).notNull(),recorderId: varchar("recorder_id", { length: 16 }),gain: gainLevelEnum("gain"),batteryV: decimal("battery_v", { precision: 2, scale: 1 }),tempC: decimal("temp_c", { precision: 3, scale: 1 }),createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),createdBy: varchar("created_by", { length: 64 }).notNull(),lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),modifiedBy: varchar("modified_by", { length: 64 }).notNull(),});// File Metadata Tableexport const fileMetadata = pgTable("file_metadata", {fileId: varchar("file_id", { length: 21 }).primaryKey().references(() => file.id),json: json("json"),createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),createdBy: varchar("created_by", { length: 64 }).notNull(),lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),modifiedBy: varchar("modified_by", { length: 64 }).notNull(),}, (table) => ({jsonIdx: index("idx_file_metadata_json").on(table.json),}));// Junction Table for Files to Datasetexport const fileDataset = pgTable("file_dataset", {fileId: varchar("file_id", { length: 21 }).notNull().references(() => file.id),datasetId: varchar("dataset_id", { length: 12 }).notNull().references(() => dataset.id),createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),createdBy: varchar("created_by", { length: 64 }).notNull(),lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),modifiedBy: varchar("modified_by", { length: 64 }).notNull(),}, (table) => ({pk: primaryKey({columns: [table.fileId, table.datasetId]}),fileIdx: index("idx_file_dataset_file").on(table.fileId),datasetIdx: index("idx_file_dataset_dataset").on(table.datasetId),}));// Selection Tableexport const selection = pgTable("selection", {id: varchar("id", { length: 21 }).primaryKey(), // nanoidfileId: varchar("file_id", { length: 21 }).notNull().references(() => file.id),datasetId: varchar("dataset_id", { length: 12 }).notNull().references(() => dataset.id),startTime: decimal("start_time", { precision: 7, scale: 3 }).notNull(),endTime: decimal("end_time", { precision: 7, scale: 3 }).notNull(),freqLow: decimal("freq_low", { precision: 9, scale: 3 }),freqHigh: decimal("freq_high", { precision: 9, scale: 3 }),description: varchar("description", { length: 255 }),storagePrefix: varchar("storage_prefix", { length: 34 }).notNull(),upload: boolean("upload").default(true),approved: boolean("approved").default(false),isSolarNight: boolean("is_solar_night"),isCivilNight: boolean("is_civil_night"),moonPhase: decimal("moon_phase", { precision: 3, scale: 2 }),createdBy: varchar("created_by", { length: 64 }).notNull(),createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),modifiedBy: varchar("modified_by", { length: 64 }).notNull(),active: boolean("active").default(true),}, (table) => ({fileIdx: index("idx_selection_file").on(table.fileId),datasetIdx: index("idx_selection_dataset").on(table.datasetId),activeIdx: index("idx_selection_active").on(table.active),approvedIdx: index("idx_selection_approved").on(table.approved),isSolarNightIdx: index("idx_selection_is_solar_night").on(table.isSolarNight),isCivilNightIdx: index("idx_selection_is_civil_night").on(table.isCivilNight),moonPhaseIdx: index("idx_selection_moon_phase").on(table.moonPhase),// Adding the foreign key constraint to fileDatasetfkFileDataset: foreignKey({columns: [table.fileId, table.datasetId],foreignColumns: [fileDataset.fileId, fileDataset.datasetId],name: "fk_selection_file_dataset"}),}));// Selection Metadata Tableexport const selectionMetadata = pgTable("selection_metadata", {selectionId: varchar("selection_id", { length: 21 }).primaryKey().references(() => selection.id),json: json("json"),createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),createdBy: varchar("created_by", { length: 64 }).notNull(),lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),modifiedBy: varchar("modified_by", { length: 64 }).notNull(),}, (table) => ({jsonIdx: index("idx_selection_metadata_json").on(table.json),}));// eBird Taxonomy Tableexport const ebirdTaxonomy = pgTable("ebird_taxonomy", {id: varchar("id", { length: 12 }).primaryKey(),taxonomyVersion: varchar("taxonomy_version", { length: 4 }).notNull(),taxonOrder: integer("taxon_order").notNull(),category: varchar("category", { length: 15 }).notNull(),speciesCode: varchar("species_code", { length: 15 }).notNull(),taxonConceptId: varchar("taxon_concept_id", { length: 15 }),primaryComName: varchar("primary_com_name", { length: 100 }).notNull(),sciName: varchar("sci_name", { length: 100 }).notNull(),birdOrder: varchar("bird_order", { length: 30 }),family: varchar("family", { length: 100 }),speciesGroup: varchar("species_group", { length: 100 }),reportAs: varchar("report_as", { length: 15 }),validFrom: timestamp("valid_from").notNull(),validTo: timestamp("valid_to"),}, (table) => ({speciesCodeVersionUnique: unique().on(table.speciesCode, table.taxonomyVersion),}));// Species Tableexport const species = pgTable("species", {id: varchar("id", { length: 12 }).primaryKey(), // nanoid(12)label: varchar("label", { length: 100 }).notNull(),ebirdCode: varchar("ebird_code", { length: 12 }),taxonomyVersion: varchar("taxonomy_version", { length: 4 }),description: varchar("description", { length: 255 }),createdBy: varchar("created_by", { length: 64 }).notNull(),createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),modifiedBy: varchar("modified_by", { length: 64 }).notNull(),active: boolean("active").default(true),owner: varchar("owner", { length: 64 }).notNull(),}, (table) => ({labelIdx: index("idx_species_label").on(table.label),ebirdIdx: index("idx_species_ebird").on(table.ebirdCode),// Foreign key reference to ebird_taxonomyebirdTaxonomyRef: foreignKey({columns: [table.ebirdCode, table.taxonomyVersion],foreignColumns: [ebirdTaxonomy.speciesCode, ebirdTaxonomy.taxonomyVersion],}),}));// Call Types Tableexport const callType = pgTable("call_type", {id: varchar("id", { length: 12 }).primaryKey(), // nanoid(12)speciesId: varchar("species_id", { length: 12 }).notNull().references(() => species.id),label: varchar("label", { length: 100 }).notNull(),createdBy: varchar("created_by", { length: 64 }).notNull(),createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),modifiedBy: varchar("modified_by", { length: 64 }).notNull(),active: boolean("active").default(true),}, (table) => ({speciesIdx: index("idx_call_type_species").on(table.speciesId),labelIdx: index("idx_call_type_label").on(table.label),}));// Filter Tableexport const filter = pgTable("filter", {id: varchar("id", { length: 12 }).primaryKey(), // nanoidname: varchar("name", { length: 140 }).notNull(),description: varchar("description", { length: 255 }),createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),createdBy: varchar("created_by", { length: 64 }).notNull(),lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),modifiedBy: varchar("modified_by", { length: 64 }).notNull(),owner: varchar("owner", { length: 64 }).notNull(),active: boolean("active").default(true),});// Label Tableexport const label = pgTable("label", {id: varchar("id", { length: 21 }).primaryKey(), // nanoidselectionId: varchar("selection_id", { length: 21 }).notNull().references(() => selection.id),speciesId: varchar("species_id", { length: 12 }).notNull().references(() => species.id),filterId: varchar("filter_id", { length: 12 }).references(() => filter.id),certainty: decimal("certainty", { precision: 5, scale: 2 }),createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),createdBy: varchar("created_by", { length: 64 }).notNull(),lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),modifiedBy: varchar("modified_by", { length: 64 }).notNull(),active: boolean("active").default(true),}, (table) => ({selectionIdIdx: index("idx_label_selection_id").on(table.selectionId),speciesIdIdx: index("idx_label_species_id").on(table.speciesId),}));// Label Sub-type Tableexport const labelSubtype = pgTable("label_subtype", {id: varchar("id", { length: 21 }).primaryKey(), // nanoidlabelId: varchar("label_id", { length: 21 }).notNull().references(() => label.id),calltypeId: varchar("calltype_id", { length: 12 }).notNull().references(() => callType.id),filterId: varchar("filter_id", { length: 12 }).references(() => filter.id),certainty: decimal("certainty", { precision: 5, scale: 2 }),createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),createdBy: varchar("created_by", { length: 64 }).notNull(),lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),modifiedBy: varchar("modified_by", { length: 64 }).notNull(),active: boolean("active").default(true),});// File Sizes Tableexport const fileSize = pgTable("file_size", {fileId: varchar("file_id", { length: 21 }).primaryKey().references(() => file.id),fileSize: bigint("file_size", { mode: "number" }).notNull(),createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),}, (table) => ({fileIdx: index("idx_file_size_file").on(table.fileId),}));// Species to Dataset junction tableexport const speciesDataset = pgTable("species_dataset", {speciesId: varchar("species_id", { length: 12 }).notNull().references(() => species.id),datasetId: varchar("dataset_id", { length: 12 }).notNull().references(() => dataset.id),createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),createdBy: varchar("created_by", { length: 64 }).notNull(),lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),modifiedBy: varchar("modified_by", { length: 64 }).notNull(),}, (table) => ({pk: primaryKey({columns: [table.speciesId, table.datasetId]}),speciesIdx: index("idx_species_dataset_species").on(table.speciesId),datasetIdx: index("idx_species_dataset_dataset").on(table.datasetId),}));// Access Grant Tableexport const accessGrant = pgTable("access_grant", {id: varchar("id", { length: 12 }).primaryKey(), // nanoid()datasetId: varchar("dataset_id", { length: 12 }).notNull().references(() => dataset.id),role: roleNameEnum("role").notNull(),permission: permissionNameEnum("permission").notNull(),userId: varchar("user_id", { length: 64 }),createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),createdBy: varchar("created_by", { length: 64 }).notNull(),lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),modifiedBy: varchar("modified_by", { length: 64 }).notNull(),active: boolean("active").default(true),}, (table) => ({uniqueAccessGrant: unique().on(table.datasetId, table.role, table.permission, table.userId),uniqueRolePermission: uniqueIndex("idx_unique_role_permission").on(table.datasetId, table.role, table.permission).where(sql`user_id IS NULL`),datasetIdx: index("idx_access_grant_dataset").on(table.datasetId),roleIdx: index("idx_access_grant_role").on(table.role),permissionIdx: index("idx_access_grant_permission").on(table.permission),userIdx: index("idx_access_grant_user").on(table.userId),activeIdx: index("idx_access_grant_active").on(table.active),}));// User Role Tableexport const userRole = pgTable("user_role", {userId: varchar("user_id", { length: 64 }).primaryKey(),role: roleNameEnum("role").notNull().default("USER"),createdAt: timestamp("created_at", { withTimezone: true }).defaultNow(),createdBy: varchar("created_by", { length: 64 }).notNull(),lastModified: timestamp("last_modified", { withTimezone: true }).defaultNow(),modifiedBy: varchar("modified_by", { length: 64 }).notNull(),}, (table) => ({userRoleIdx: index("idx_user_role").on(table.userId),}));
-- NOTE: DBML does not like functions and materialised views-- sql2dbml schema.sql --postgres -o schema.dbml-- no need to keep a storage url for files, pattern is $prefix/$file_id.opus,-- this way can change provider anytime.-- store files and selections on aws, to train models-- 1 bucket for files, 1 bucket for selections, random 12 digit prefix to filename-- save files as opus-- save selections as wav to prevent over compression-- junction table for files-dataset relationship-- (file can be a member of many datasets)-- (selections apply only to 1 dataset)-- use suncalc.js-- could use a function for night, on client not db, but want to filter on night-- in file table use mid point of file as time-- all times must be zoned to utc-- dataset type enumCREATE TYPE dataset_type AS ENUM ('organise', 'test', 'train');-- Dataset Table-- Add type column to the dataset table, so that I do not ever mix testing data with training data.CREATE TABLE dataset (id VARCHAR(12) PRIMARY KEY, -- nanoid(12)name VARCHAR(255) NOT NULL,public BOOLEAN DEFAULT FALSE, -- Whether dataset is publicly accessibledescription VARCHAR(255),created_by VARCHAR(64) NOT NULL, -- kinde user idcreated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idowner VARCHAR(64) NOT NULL, -- kinde user idactive BOOLEAN DEFAULT TRUE,type dataset_type NOT NULL DEFAULT 'organise');CREATE INDEX idx_dataset_name ON dataset(name);CREATE INDEX idx_dataset_owner ON dataset(owner);CREATE INDEX idx_dataset_active ON dataset(active);CREATE INDEX idx_dataset_public ON dataset(public);---------------------------------- 2025-04-09 07:41:08.35534+00 just to test out---------------------------------- 2025-04-09 07:43:45.115101+00 data-- Locations TableCREATE TABLE location (id VARCHAR(12) PRIMARY KEY, -- nanoid(12)dataset_id VARCHAR(12) NOT NULL, -- nanoid, link to datasetname VARCHAR(140) NOT NULL,latitude DECIMAL(10, 7) CHECK (latitude BETWEEN -90.0 AND 90.0), -- -45.5027longitude DECIMAL(10, 7) CHECK (longitude BETWEEN -180.0 AND 180.0), -- 167.48406description VARCHAR(255), -- Limited to 255 characters for efficiencycreated_by VARCHAR(64) NOT NULL, -- kinde user idcreated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idactive BOOLEAN DEFAULT TRUE,FOREIGN KEY (dataset_id) REFERENCES dataset(id));CREATE INDEX idx_location_name ON location(name);CREATE INDEX idx_location_dataset ON location(dataset_id); -- ??CREATE INDEX idx_location_active ON location(active); -- ??CREATE INDEX idx_location_dataset_active ON location(dataset_id, active);-- Add recording pattern, mainly so it can be searchable,-- this is an optional field, audio moth needs this, to help-- with searching and filtering-- i have 24/7: 1 in 30 and 1 in 40CREATE TABLE cyclic_recording_pattern (id VARCHAR(12) PRIMARY KEY, -- nanoid(12)record_s INTEGER NOT NULL,sleep_s INTEGER NOT NULL,created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL,last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL);-- Cluster Table (think of a cluster of files as all the files on 1 SD Card)-- a statistical unit with no major time gaps, to enable call rate stats-- See changes below, added foreign key on location, added recording pattern-- added timezone_id for iana timezone idCREATE TABLE cluster (id VARCHAR(12) PRIMARY KEY, -- nanoid(12)dataset_id VARCHAR(12) NOT NULL, -- nanoid, link to datasetlocation_id VARCHAR(12) NOT NULL, -- A cluster must have a location, as well as a datasetname VARCHAR(140) NOT NULL,description VARCHAR(255), -- Limited to 255 characters for efficiencycreated_by VARCHAR(64) NOT NULL, -- kinde user idcreated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idactive BOOLEAN DEFAULT TRUE,timezone_id VARCHAR(40), -- not required as may need to auto generate cluster, can't always ask or checkcyclic_recording_pattern_id VARCHAR(12),FOREIGN KEY (dataset_id) REFERENCES dataset(id),FOREIGN KEY (location_id) REFERENCES location(id),FOREIGN KEY (cyclic_recording_pattern_id) REFERENCES cyclic_recording_pattern(id));CREATE INDEX idx_cluster_dataset ON cluster(dataset_id);CREATE INDEX idx_cluster_active ON cluster(active);CREATE INDEX idx_cluster_dataset_active ON cluster(dataset_id, active);CREATE INDEX idx_cluster_recording_pattern ON cluster(cyclic_recording_pattern_id);-- values in my data is medium and medium-highCREATE TYPE gain_level AS ENUM ('low', 'low-medium', 'medium', 'medium-high', 'high');-- Files Table (removed dataset_id as now use junction table)-- Note: timestamp_local should reflect local time, not timestamp in filename, this is important-- duration must not have more than 3 decimal places, check in uiCREATE TABLE file (id VARCHAR(21) PRIMARY KEY, -- nanoidfile_name VARCHAR(255) NOT NULL,path VARCHAR(255) NULL, -- optional hint for local accessxxh64_hash VARCHAR(16) NOT NULL, -- hash of original file cbe675a69a5fef1clocation_id VARCHAR(12) NOT NULL, -- nanoid, from locations tabletimestamp_local TIMESTAMP WITH TIME ZONE NOT NULL, -- parsed from filename, adjust for daylight savingcluster_id VARCHAR(12), -- nanoid(12), optional if imported one by oneduration DECIMAL(7, 3) NOT NULL CHECK (duration > 0), -- in seconds, allowing for millisecond precision (9999.999s)sample_rate INTEGER NOT NULL,description VARCHAR(255), -- Limited to 255 characters for efficiencystorage_prefix VARCHAR(34) NOT NULL, -- prefix to compressed copy in object storage, nanoid(3) customAlphabetupload BOOLEAN DEFAULT TRUE,maybe_solar_night BOOLEAN, --calculate with function on client. this is a more accurate value to file tablemaybe_civil_night BOOLEAN, --calculate with function on client. this is a more accurate value to file tablemoon_phase DECIMAL(3,2) CHECK (moon_phase BETWEEN 0.00 AND 1.00), -- 0.00 to 1.00 (new moon to full moon)created_by VARCHAR(64) NOT NULL, -- kinde user idcreated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idactive BOOLEAN DEFAULT TRUE,FOREIGN KEY (location_id) REFERENCES location(id),FOREIGN KEY (cluster_id) REFERENCES cluster(id));CREATE INDEX idx_file_location ON file(location_id);CREATE INDEX idx_file_active ON file(active);CREATE INDEX idx_file_timestamp_local ON file(timestamp_local);CREATE INDEX idx_file_cluster ON file(cluster_id);CREATE INDEX idx_file_maybe_solar_night ON file(maybe_solar_night);CREATE INDEX idx_file_maybe_civil_night ON file(maybe_civil_night);CREATE TABLE moth_metadata (file_id VARCHAR(21) PRIMARY KEY,timestamp TIMESTAMP WITH TIME ZONE NOT NULL,recorder_id VARCHAR(16), -- 24F31901603710CD (16)gain gain_level NULL, -- low, medium, high or nullbattery_v DECIMAL(2, 1) CHECK (battery_v >= 0), -- for values from 0 to 9.9temp_c DECIMAL(3, 1), -- e.g., 24.2created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL, -- kinde user idlast_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idFOREIGN KEY (file_id) REFERENCES file(id));CREATE TABLE file_metadata (file_id VARCHAR(21) PRIMARY KEY,json JSONB, -- For noise levels and other file-level metadatacreated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL, -- kinde user idlast_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idFOREIGN KEY (file_id) REFERENCES file(id));CREATE INDEX idx_file_metadata_json ON file_metadata USING gin(json);-- Junction Table for Files to Dataset (many-to-many)CREATE TABLE file_dataset (file_id VARCHAR(21) NOT NULL,dataset_id VARCHAR(12) NOT NULL,created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL, -- kinde user idlast_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idPRIMARY KEY (file_id, dataset_id),FOREIGN KEY (file_id) REFERENCES file(id),FOREIGN KEY (dataset_id) REFERENCES dataset(id));-- indexes for the junction tableCREATE INDEX idx_file_dataset_file ON file_dataset(file_id);CREATE INDEX idx_file_dataset_dataset ON file_dataset(dataset_id);-- Selection TableCREATE TABLE selection(id VARCHAR(21) PRIMARY KEY, -- nanoidfile_id VARCHAR(21) NOT NULL, -- nanoiddataset_id VARCHAR(12) NOT NULL, -- nanoid, link to datasetstart_time DECIMAL(7,3) NOT NULL, --up to 9999.999 secondsend_time DECIMAL(7,3) NOT NULL, -- up to 9999.999 secondsfreq_low DECIMAL(9,3) CHECK (freq_low < 300000), -- LOOK AT CHECKfreq_high DECIMAL(9,3) CHECK (freq_high < 300000), -- LOOK AT CHECKdescription VARCHAR(255), -- Limited to 255 characters for efficiencystorage_prefix VARCHAR(34) NOT NULL, -- prefix to compressed copy in object storage, nanoid customAlphabetupload BOOLEAN DEFAULT TRUE,approved BOOLEAN DEFAULT FALSE, -- Approval status by dataset owneris_solar_night BOOLEAN, --calculate with function on client. this is a more accurate value to file tableis_civil_night BOOLEAN, --calculate with function on client. this is a more accurate value to file tablemoon_phase DECIMAL(3,2) CHECK (moon_phase BETWEEN 0.00 AND 1.00), -- 0.00 to 1.00 (new moon to full moon)created_by VARCHAR(64) NOT NULL, -- kinde user idcreated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idactive BOOLEAN DEFAULT TRUE,FOREIGN KEY (file_id) REFERENCES file(id),FOREIGN KEY (dataset_id) REFERENCES dataset(id));CREATE INDEX idx_selection_file ON selection(file_id);CREATE INDEX idx_selection_dataset ON selection(dataset_id);CREATE INDEX idx_selection_active ON selection(active);CREATE INDEX idx_selection_approved ON selection(approved);CREATE INDEX idx_selection_is_solar_night ON selection(is_solar_night);CREATE INDEX idx_selection_is_civil_night ON selection(is_civil_night);CREATE INDEX idx_selection_moon_phase ON selection(moon_phase);CREATE TABLE selection_metadata (selection_id VARCHAR(21) PRIMARY KEY,json JSONB, -- for loudness, noise, and other selection-level metadaatacreated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL, -- kinde user idlast_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idFOREIGN KEY (selection_id) REFERENCES selection(id));CREATE INDEX idx_selection_metadata_json ON selection_metadata USING gin(json);-- eBird Taxonomy Table-- will need to update INDEX too when introducing a new version-- see working with ebird taxonomies, aichat, deepseek, macbook-- see materialised view and index on it-- see alter table stuff below, modificationsCREATE TABLE ebird_taxonomy (id VARCHAR(12) PRIMARY KEY,taxonomy_version VARCHAR(4) NOT NULL,taxon_order INTEGER NOT NULL,category VARCHAR(15) NOT NULL,species_code VARCHAR(15) NOT NULL,taxon_concept_id VARCHAR(15),primary_com_name VARCHAR(100) NOT NULL,sci_name VARCHAR(100) NOT NULL,bird_order VARCHAR(30),family VARCHAR(100),species_group VARCHAR(100),report_as VARCHAR(15),valid_from DATE NOT NULL,valid_to DATE,UNIQUE (species_code, taxonomy_version));-- Species Table (mutable), removed link to dataset_id and added owner fieldCREATE TABLE species (id VARCHAR(12) PRIMARY KEY, -- nanoid(12)label VARCHAR(100) NOT NULL, -- display label for the speciesebird_code VARCHAR(12), -- link to ebird taxonomytaxonomy_version VARCHAR(4),description VARCHAR(255),created_by VARCHAR(64) NOT NULL, -- kinde user idcreated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idactive BOOLEAN DEFAULT TRUE,owner VARCHAR(64) NOT NULL,FOREIGN KEY (ebird_code, taxonomy_version) REFERENCES ebird_taxonomy(species_code, taxonomy_version));CREATE INDEX idx_species_label ON species(label);CREATE INDEX idx_species_ebird ON species(ebird_code);-- Call Types Table (mutable)CREATE TABLE call_type (id VARCHAR(12) PRIMARY KEY, -- nanoid(12)species_id VARCHAR(12) NOT NULL, -- link to parent specieslabel VARCHAR(100) NOT NULL, -- display name like "male", "female", "duet"created_by VARCHAR(64) NOT NULL, -- kinde user idcreated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idactive BOOLEAN DEFAULT TRUE,FOREIGN KEY (species_id) REFERENCES species(id));CREATE INDEX idx_call_type_species ON call_type(species_id);CREATE INDEX idx_call_type_label ON call_type(label);CREATE TABLE filter (id VARCHAR(12) PRIMARY KEY, -- nanoidname VARCHAR(140) NOT NULL,description VARCHAR(255),created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL, -- kinde user idlast_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idowner VARCHAR(64) NOT NULL,active BOOLEAN NOT NULL DEFAULT true);-- Label Table, many to 1 relationship withCREATE TABLE label (id VARCHAR(21) PRIMARY KEY, -- nanoidselection_id VARCHAR(21) NOT NULL, -- link to selection tablespecies_id VARCHAR(12) NOT NULL, -- link to species tablefilter_id VARCHAR(12),certainty DECIMAL(5,2) CHECK (certainty <= 100 AND certainty >= 0),created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL, -- kinde user idlast_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idactive BOOLEAN NOT NULL DEFAULT true,FOREIGN KEY (selection_id) REFERENCES selection(id),FOREIGN KEY (species_id) REFERENCES species(id),FOREIGN KEY (filter_id) REFERENCES filter(id));CREATE INDEX idx_label_selection_id ON label(selection_id);CREATE INDEX idx_label_species_id ON label(species_id);-- Label Sub-type Table (optional 1:1 relationship with label)CREATE TABLE label_subtype (id VARCHAR(21) PRIMARY KEY, -- nanoidlabel_id VARCHAR(21) NOT NULL, -- link to parent labelcalltype_id VARCHAR(12) NOT NULL, -- link to call_type tablefilter_id VARCHAR(12),certainty DECIMAL(5,2) CHECK (certainty <= 100 AND certainty >= 0),created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL, -- kinde user idlast_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idactive BOOLEAN NOT NULL DEFAULT true,FOREIGN KEY (label_id) REFERENCES label(id),FOREIGN KEY (calltype_id) REFERENCES call_type(id),FOREIGN KEY (filter_id) REFERENCES filter(id)-- UNIQUE (label_id) -- ensures 1:1 relationship with label. how do i handle multiple call type filters? needs 1:many relation);-- File Sizes Table (populated by batch job querying AWS)(mutable)CREATE TABLE file_size (file_id VARCHAR(21) PRIMARY KEY, -- nanoid, matching the files tablefile_size BIGINT NOT NULL, -- size in bytes (using BIGINT just in case files > 2GB)created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,FOREIGN KEY (file_id) REFERENCES file(id));CREATE INDEX idx_file_size_file ON file_size(file_id);-- file_dataset must exist first-- Referential Integrity for Selections-- To ensure `selections.dataset_id` is valid for the associated file:-- Add composite foreign key (requires file_dataset to exist first)ALTER TABLE selection ADD CONSTRAINT fk_selection_file_datasetFOREIGN KEY (file_id, dataset_id) REFERENCES file_dataset(file_id, dataset_id);-- 2024 Taxonomy View, after populating underlying table-- see working with ebird taxonomies, aichat, deepseek, macbook-- I think I still need this to display the options when someone creates a speciesCREATE MATERIALIZED VIEW ebird_taxonomy_v2024 ASSELECTid,species_code,primary_com_name,sci_name,bird_order, -- AS "order", order is reserved word in pgsqlfamilyFROM ebird_taxonomyWHERE taxonomy_version = '2024';-- to help with plain text search on common name and scientific nameCREATE INDEX idx_ebird_name_search ON ebird_taxonomy_v2024 USING gin(to_tsvector('english', primary_com_name || ' ' || sci_name));CREATE INDEX ebird_taxonomy_species_code ON ebird_taxonomy_v2024(species_code);-- Junction Table for Species to Dataset (many-to-many)CREATE TABLE species_dataset (species_id VARCHAR(21) NOT NULL, -- NB: should be 12 !!!!!!!!!!!!!!!!!!!dataset_id VARCHAR(12) NOT NULL,created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL, -- kinde user idlast_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idPRIMARY KEY (species_id, dataset_id),FOREIGN KEY (species_id) REFERENCES species(id),FOREIGN KEY (dataset_id) REFERENCES dataset(id));-- indexes for the junction tableCREATE INDEX idx_species_dataset_species ON species_dataset(species_id);CREATE INDEX idx_species_dataset_dataset ON species_dataset(dataset_id);-- Dataset level permissions------------------------------ public switch in dataset table to remain unimplemented, default private-- check that this is secure, it is simple though-- Roles:-- ADMIN role is granted to paid tier, create datasets, upload files and-- USER role is grannted to free tier, read only except can make new selections-- Domain:Pesmission-- Permission for a domain can be: Read, Manage, Delete-- ??not able to delete an access grant, can only modify-- ??this way we maintain some semblance of an history-- datasets are soft deleted therefore can be reactivated-- role grants will then still exist-- paid can be implemented with permission in the kinde token-- Permissions are assigned to roles, and roles are assigned to users. This makes it easier to manage permissions for groups of users.-- The `user_id` field in the `access_grant` table allows you to override the default permissions for a specific user. For example, you might give a specific user ADMIN access to a dataset, even if their general role is USER. If the `user_id` is null, that means the role is applied across the dataset for all users with that role.-- Access is controlled at the dataset level. A user's ability to perform actions (read, manage, delete) depends on the permissions granted to their role (or to them directly) for a *specific* dataset.-- The `active` flag in `access_grant` provides a soft-delete mechanism. Instead of deleting a grant, you set `active = FALSE`. This preserves the history of access grants and is crucial for auditing and compliance.-- integrate Kinde authentication system to retrieve user roles and IDs. The Kinde token should contain information about the user's role. The application then uses the `user_id` from the token when querying the `access_grant` table.-- application code will need to perform checks against the `access_grant` table to determine if a user has the necessary permissions to perform an action on a specific dataset. This involves a function or middleware that intercepts requests and verifies permissions.-- Important: Order matters-- ENUMs to enforce valid roles/permissionsCREATE TYPE role_name AS ENUM ('ADMIN', 'USER', 'CURATOR');CREATE TYPE permission_name AS ENUM ('READ', 'UPLOAD', 'DOWNLOAD', 'EDIT', 'DELETE');-- Access Grant Table (Associates roles, users, datasets, and permissions)CREATE TABLE access_grant (id VARCHAR(12) PRIMARY KEY, -- nanoid()dataset_id VARCHAR(12) NOT NULL, --The dataset this grant applies torole role_name NOT NULL, --The role this grant applies topermission permission_name NOT NULL, --The permission being granteduser_id VARCHAR(64), --Optional, Grants permission to a specific user (overrides role). If `NULL`, the grant applies to *all* users with the specified role for the dataset.created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL, -- kinde user idlast_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idactive BOOLEAN DEFAULT TRUE, --allows you to effectively "revoke" access without deleting the grant record, preserving historyFOREIGN KEY (dataset_id) REFERENCES dataset(id),CONSTRAINT unique_access_grant UNIQUE (dataset_id, role, permission, user_id) --prevents redundant or conflicting access grants);-- to prevent duplicate role-based permissions when user_id is NULLCREATE UNIQUE INDEX idx_unique_role_permissionON access_grant(dataset_id, role, permission)WHERE user_id IS NULL;CREATE INDEX idx_access_grant_dataset ON access_grant(dataset_id);CREATE INDEX idx_access_grant_role ON access_grant(role);CREATE INDEX idx_access_grant_permission ON access_grant(permission);CREATE INDEX idx_access_grant_user ON access_grant(user_id);CREATE INDEX idx_access_grant_active ON access_grant(active);CREATE TABLE user_role (user_id VARCHAR(64) PRIMARY KEY, -- kinde user_id, user can have 1 role onlyrole role_name NOT NULL DEFAULT 'USER',created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL, -- kinde user idlast_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL -- kinde user id);CREATE INDEX idx_user_role ON user_role(user_id);-- Section below conflicts with sql2dbml (excl triggers, not in graph though)---------------------------------------------------------------------------------- Function to update last_modified timestamp-- CREATE OR REPLACE FUNCTION update_last_modified()-- RETURNS TRIGGER AS $$-- BEGIN-- NEW.last_modified = CURRENT_TIMESTAMP;-- RETURN NEW;-- END;-- $$ LANGUAGE plpgsql;-- Trigger for file_size table-- CREATE TRIGGER update_file_size_last_modified-- BEFORE UPDATE ON file_size-- FOR EACH ROW-- EXECUTE FUNCTION update_last_modified();-- Missing some triggers here, need a trigger for:-- dataset-- location-- cluster-- file-- moth_metadata-- file_metadata-- file_dataset-- selection-- selection_metadata-- species-- call_type-- label-- label_subtype-- access_grant-- astro-- file_size (implemented above)-- sq2dbml conflict ends---------------------------------------------------------------------------
-- dataset type enumCREATE TYPE dataset_type AS ENUM ('organise', 'test', 'train');-- Dataset TableCREATE TABLE dataset (id VARCHAR(12) PRIMARY KEY, -- nanoid(12)name VARCHAR(255) NOT NULL,public BOOLEAN DEFAULT FALSE, -- Whether dataset is publicly accessibledescription VARCHAR(255),created_by VARCHAR(64) NOT NULL, -- kinde user idcreated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idowner VARCHAR(64) NOT NULL, -- kinde user idactive BOOLEAN DEFAULT TRUE,type dataset_type NOT NULL DEFAULT 'organise');CREATE INDEX idx_dataset_name ON dataset(name);CREATE INDEX idx_dataset_owner ON dataset(owner);CREATE INDEX idx_dataset_active ON dataset(active);CREATE INDEX idx_dataset_public ON dataset(public);SELECT NOW();-- 2025-04-09 07:41:08.35534+00 enum, table, indexes-- add dataset [x]-- copy csv into duckdb using cli if needed to sanitise-- COPY dataset FROM 'dataset.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER);-- COPY dataset TO 'dataset.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER, NULL '');-- upload to neon\copy dataset FROM 'dataset.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 07:43:45.115101+00-- Locations TableCREATE TABLE location (id VARCHAR(12) PRIMARY KEY, -- nanoid(12)dataset_id VARCHAR(12) NOT NULL, -- nanoid, link to datasetname VARCHAR(140) NOT NULL,latitude DECIMAL(10, 7) CHECK (latitude BETWEEN -90.0 AND 90.0), -- -45.5027longitude DECIMAL(10, 7) CHECK (longitude BETWEEN -180.0 AND 180.0), -- 167.48406description VARCHAR(255), -- Limited to 255 characters for efficiencycreated_by VARCHAR(64) NOT NULL, -- kinde user idcreated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idactive BOOLEAN DEFAULT TRUE,FOREIGN KEY (dataset_id) REFERENCES dataset(id));SELECT NOW();-- 2025-04-09 15:11:48.100717+00CREATE INDEX idx_location_name ON location(name);CREATE INDEX idx_location_dataset ON location(dataset_id); -- ??CREATE INDEX idx_location_active ON location(active); -- ??CREATE INDEX idx_location_dataset_active ON location(dataset_id, active);SELECT NOW();-- 2025-04-09 15:13:04.925533+00CREATE TABLE cyclic_recording_pattern (id VARCHAR(12) PRIMARY KEY, -- nanoid(12)record_s INTEGER NOT NULL,sleep_s INTEGER NOT NULL,created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL,last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL);SELECT NOW();-- 2025-04-09 15:13:45.288681+00CREATE TABLE cluster (id VARCHAR(12) PRIMARY KEY, -- nanoid(12)dataset_id VARCHAR(12) NOT NULL, -- nanoid, link to datasetlocation_id VARCHAR(12) NOT NULL, -- A cluster must have a location, as well as a datasetname VARCHAR(140) NOT NULL,description VARCHAR(255), -- Limited to 255 characters for efficiencycreated_by VARCHAR(64) NOT NULL, -- kinde user idcreated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idactive BOOLEAN DEFAULT TRUE,timezone_id VARCHAR(40), -- not required as may need to auto generate cluster, can't always ask or checkcyclic_recording_pattern_id VARCHAR(12),FOREIGN KEY (dataset_id) REFERENCES dataset(id),FOREIGN KEY (location_id) REFERENCES location(id),FOREIGN KEY (cyclic_recording_pattern_id) REFERENCES cyclic_recording_pattern(id));SELECT NOW();--2025-04-09 15:15:26.042802+00CREATE INDEX idx_cluster_dataset ON cluster(dataset_id);CREATE INDEX idx_cluster_active ON cluster(active);CREATE INDEX idx_cluster_dataset_active ON cluster(dataset_id, active);CREATE INDEX idx_cluster_recording_pattern ON cluster(cyclic_recording_pattern_id);SELECT NOW();-- 2025-04-09 15:16:21.964596+00-- values in my data is medium and medium-highCREATE TYPE gain_level AS ENUM ('low', 'low-medium', 'medium', 'medium-high', 'high');SELECT NOW();-- 2025-04-09 15:17:52.262936+00CREATE TABLE file (id VARCHAR(21) PRIMARY KEY, -- nanoidfile_name VARCHAR(255) NOT NULL,path VARCHAR(255) NULL, -- optional hint for local accessxxh64_hash VARCHAR(16) NOT NULL, -- hash of original file cbe675a69a5fef1clocation_id VARCHAR(12) NOT NULL, -- nanoid, from locations tabletimestamp_local TIMESTAMP WITH TIME ZONE NOT NULL, -- parsed from filename, adjust for daylight savingcluster_id VARCHAR(12), -- nanoid(12), optional if imported one by oneduration DECIMAL(7, 3) NOT NULL CHECK (duration > 0), -- in seconds, allowing for millisecond precision (9999.999s)sample_rate INTEGER NOT NULL,description VARCHAR(255), -- Limited to 255 characters for efficiencystorage_prefix VARCHAR(34) NOT NULL, -- prefix to compressed copy in object storage, nanoid(3) customAlphabetupload BOOLEAN DEFAULT TRUE,maybe_solar_night BOOLEAN, --calculate with function on client. this is a more accurate value to file tablemaybe_civil_night BOOLEAN, --calculate with function on client. this is a more accurate value to file tablemoon_phase DECIMAL(3,2) CHECK (moon_phase BETWEEN 0.00 AND 1.00), -- 0.00 to 1.00 (new moon to full moon)created_by VARCHAR(64) NOT NULL, -- kinde user idcreated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idactive BOOLEAN DEFAULT TRUE,FOREIGN KEY (location_id) REFERENCES location(id),FOREIGN KEY (cluster_id) REFERENCES cluster(id));SELECT NOW();-- 2025-04-09 15:39:18.281913+00CREATE INDEX idx_file_location ON file(location_id);CREATE INDEX idx_file_active ON file(active);CREATE INDEX idx_file_timestamp_local ON file(timestamp_local);CREATE INDEX idx_file_cluster ON file(cluster_id);CREATE INDEX idx_file_maybe_solar_night ON file(maybe_solar_night);CREATE INDEX idx_file_maybe_civil_night ON file(maybe_civil_night);SELECT NOW();-- 2025-04-09 15:40:07.82481+00CREATE TABLE moth_metadata (file_id VARCHAR(21) PRIMARY KEY,timestamp TIMESTAMP WITH TIME ZONE NOT NULL,recorder_id VARCHAR(16), -- 24F31901603710CD (16)gain gain_level NULL, -- low, medium, high or nullbattery_v DECIMAL(2, 1) CHECK (battery_v >= 0), -- for values from 0 to 9.9temp_c DECIMAL(3, 1), -- e.g., 24.2created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL, -- kinde user idlast_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idFOREIGN KEY (file_id) REFERENCES file(id));SELECT NOW();-- 2025-04-09 15:40:51.607083+00CREATE TABLE file_metadata (file_id VARCHAR(21) PRIMARY KEY,json JSONB, -- For noise levels and other file-level metadatacreated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL, -- kinde user idlast_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idFOREIGN KEY (file_id) REFERENCES file(id));SELECT NOW();-- 2025-04-09 15:41:54.236904+00CREATE INDEX idx_file_metadata_json ON file_metadata USING gin(json);SELECT NOW();-- 2025-04-09 15:42:39.410136+00-- Junction Table for Files to Dataset (many-to-many)CREATE TABLE file_dataset (file_id VARCHAR(21) NOT NULL,dataset_id VARCHAR(12) NOT NULL,created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL, -- kinde user idlast_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idPRIMARY KEY (file_id, dataset_id),FOREIGN KEY (file_id) REFERENCES file(id),FOREIGN KEY (dataset_id) REFERENCES dataset(id));SELECT NOW();-- 2025-04-09 15:43:13.267234+00-- indexes for the junction tableCREATE INDEX idx_file_dataset_file ON file_dataset(file_id);CREATE INDEX idx_file_dataset_dataset ON file_dataset(dataset_id);SELECT NOW();-- 2025-04-09 15:43:41.62687+00-- Selection TableCREATE TABLE selection(id VARCHAR(21) PRIMARY KEY, -- nanoidfile_id VARCHAR(21) NOT NULL, -- nanoiddataset_id VARCHAR(12) NOT NULL, -- nanoid, link to datasetstart_time DECIMAL(7,3) NOT NULL, --up to 9999.999 secondsend_time DECIMAL(7,3) NOT NULL, -- up to 9999.999 secondsfreq_low DECIMAL(9,3) CHECK (freq_low < 300000), -- LOOK AT CHECKfreq_high DECIMAL(9,3) CHECK (freq_high < 300000), -- LOOK AT CHECKdescription VARCHAR(255), -- Limited to 255 characters for efficiencystorage_prefix VARCHAR(34) NOT NULL, -- prefix to compressed copy in object storage, nanoid customAlphabetupload BOOLEAN DEFAULT TRUE,approved BOOLEAN DEFAULT FALSE, -- Approval status by dataset owneris_solar_night BOOLEAN, --calculate with function on client. this is a more accurate value to file tableis_civil_night BOOLEAN, --calculate with function on client. this is a more accurate value to file tablemoon_phase DECIMAL(3,2) CHECK (moon_phase BETWEEN 0.00 AND 1.00), -- 0.00 to 1.00 (new moon to full moon)created_by VARCHAR(64) NOT NULL, -- kinde user idcreated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idactive BOOLEAN DEFAULT TRUE,FOREIGN KEY (file_id) REFERENCES file(id),FOREIGN KEY (dataset_id) REFERENCES dataset(id));SELECT NOW();-- 2025-04-09 15:44:10.434171+00CREATE INDEX idx_selection_file ON selection(file_id);CREATE INDEX idx_selection_dataset ON selection(dataset_id);CREATE INDEX idx_selection_active ON selection(active);CREATE INDEX idx_selection_approved ON selection(approved);CREATE INDEX idx_selection_is_solar_night ON selection(is_solar_night);CREATE INDEX idx_selection_is_civil_night ON selection(is_civil_night);CREATE INDEX idx_selection_moon_phase ON selection(moon_phase);SELECT NOW();-- 2025-04-09 15:44:43.419907+00CREATE TABLE selection_metadata (selection_id VARCHAR(21) PRIMARY KEY,json JSONB, -- for loudness, noise, and other selection-level metadaatacreated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL, -- kinde user idlast_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idFOREIGN KEY (selection_id) REFERENCES selection(id));SELECT NOW();-- 2025-04-09 15:45:28.370066+00CREATE INDEX idx_selection_metadata_json ON selection_metadata USING gin(json);SELECT NOW();-- 2025-04-09 15:45:52.629629+00-- eBird Taxonomy Table-- will need to update INDEX too when introducing a new version-- see working with ebird taxonomies, aichat, deepseek, macbook-- see materialised view and index on itCREATE TABLE ebird_taxonomy (id VARCHAR(12) PRIMARY KEY,taxonomy_version VARCHAR(4) NOT NULL,taxon_order INTEGER NOT NULL,category VARCHAR(15) NOT NULL,species_code VARCHAR(15) NOT NULL,taxon_concept_id VARCHAR(15),primary_com_name VARCHAR(100) NOT NULL,sci_name VARCHAR(100) NOT NULL,bird_order VARCHAR(30),family VARCHAR(100),species_group VARCHAR(100),report_as VARCHAR(15),valid_from DATE NOT NULL,valid_to DATE,UNIQUE (species_code, taxonomy_version));SELECT NOW();-- 2025-04-09 15:46:34.74185+00-- Species Table (mutable), removed link to dataset_id and added owner fieldCREATE TABLE species (id VARCHAR(12) PRIMARY KEY, -- nanoid(12)label VARCHAR(100) NOT NULL, -- display label for the speciesebird_code VARCHAR(12), -- link to ebird taxonomytaxonomy_version VARCHAR(4),description VARCHAR(255),created_by VARCHAR(64) NOT NULL, -- kinde user idcreated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idactive BOOLEAN DEFAULT TRUE,owner VARCHAR(64) NOT NULL,FOREIGN KEY (ebird_code, taxonomy_version) REFERENCES ebird_taxonomy(species_code, taxonomy_version));SELECT NOW();-- 2025-04-09 15:47:03.599141+00CREATE INDEX idx_species_label ON species(label);CREATE INDEX idx_species_ebird ON species(ebird_code);SELECT NOW();-- 2025-04-09 15:47:30.319107+00-- Call Types Table (mutable)CREATE TABLE call_type (id VARCHAR(12) PRIMARY KEY, -- nanoid(12)species_id VARCHAR(12) NOT NULL, -- link to parent specieslabel VARCHAR(100) NOT NULL, -- display name like "male", "female", "duet"created_by VARCHAR(64) NOT NULL, -- kinde user idcreated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idactive BOOLEAN DEFAULT TRUE,FOREIGN KEY (species_id) REFERENCES species(id));SELECT NOW();-- 2025-04-09 15:47:57.081879+00CREATE INDEX idx_call_type_species ON call_type(species_id);CREATE INDEX idx_call_type_label ON call_type(label);SELECT NOW();-- 2025-04-09 15:48:24.381661+00CREATE TABLE filter (id VARCHAR(12) PRIMARY KEY, -- nanoidname VARCHAR(140) NOT NULL,description VARCHAR(255),created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL, -- kinde user idlast_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idowner VARCHAR(64) NOT NULL,active BOOLEAN NOT NULL DEFAULT true);SELECT NOW();-- 2025-04-09 15:49:27.103164+00-- Label Table, many to 1 relationship withCREATE TABLE label (id VARCHAR(21) PRIMARY KEY, -- nanoidselection_id VARCHAR(21) NOT NULL, -- link to selection tablespecies_id VARCHAR(12) NOT NULL, -- link to species tablefilter_id VARCHAR(12),certainty DECIMAL(5,2) CHECK (certainty <= 100 AND certainty >= 0),created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL, -- kinde user idlast_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idactive BOOLEAN NOT NULL DEFAULT true,FOREIGN KEY (selection_id) REFERENCES selection(id),FOREIGN KEY (species_id) REFERENCES species(id),FOREIGN KEY (filter_id) REFERENCES filter(id));SELECT NOW();-- 2025-04-09 15:50:01.91386+00CREATE INDEX idx_label_selection_id ON label(selection_id);CREATE INDEX idx_label_species_id ON label(species_id);SELECT NOW();-- 2025-04-09 15:50:21.256066+0-- Label Sub-type Table (optional 1:1 relationship with label)CREATE TABLE label_subtype (id VARCHAR(21) PRIMARY KEY, -- nanoidlabel_id VARCHAR(21) NOT NULL, -- link to parent labelcalltype_id VARCHAR(12) NOT NULL, -- link to call_type tablefilter_id VARCHAR(12),certainty DECIMAL(5,2) CHECK (certainty <= 100 AND certainty >= 0),created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL, -- kinde user idlast_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idactive BOOLEAN NOT NULL DEFAULT true,FOREIGN KEY (label_id) REFERENCES label(id),FOREIGN KEY (calltype_id) REFERENCES call_type(id),FOREIGN KEY (filter_id) REFERENCES filter(id));SELECT NOW();-- 2025-04-09 15:51:03.626905+00-- File Sizes Table (populated by batch job querying AWS)(mutable)CREATE TABLE file_size (file_id VARCHAR(21) PRIMARY KEY, -- nanoid, matching the files tablefile_size BIGINT NOT NULL, -- size in bytes (using BIGINT just in case files > 2GB)created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,FOREIGN KEY (file_id) REFERENCES file(id));SELECT NOW();-- 2025-04-09 15:51:33.005948+00CREATE INDEX idx_file_size_file ON file_size(file_id);SELECT NOW();-- 2025-04-09 15:52:01.304821+00-- file_dataset must exist first-- Referential Integrity for Selections-- To ensure `selections.dataset_id` is valid for the associated file:-- Add composite foreign key (requires file_dataset to exist first)ALTER TABLE selection ADD CONSTRAINT fk_selection_file_datasetFOREIGN KEY (file_id, dataset_id) REFERENCES file_dataset(file_id, dataset_id);SELECT NOW();-- 2025-04-09 15:52:42.744196+00\COPY ebird_taxonomy FROM 'eBird_taxonomy_v2024.tsv' WITH (FORMAT csv, DELIMITER E'\t', HEADER, QUOTE '"', ESCAPE '\', NULL '');SELECT NOW();-- 2025-04-09 15:54:42.886859+00-- 2024 Taxonomy View, after populating underlying table-- see working with ebird taxonomies, aichat, deepseek, macbook-- I need this to display the options when someone creates a speciesCREATE MATERIALIZED VIEW ebird_taxonomy_v2024 ASSELECTid,species_code,primary_com_name,sci_name,bird_order, -- AS "order", order is reserved word in pgsqlfamilyFROM ebird_taxonomyWHERE taxonomy_version = '2024';SELECT NOW();-- 2025-04-09 16:02:51.057481+00-- to help with plain text search on common name and scientific nameCREATE INDEX idx_ebird_name_search ON ebird_taxonomy_v2024 USING gin(to_tsvector('english', primary_com_name || ' ' || sci_name));CREATE INDEX ebird_taxonomy_species_code ON ebird_taxonomy_v2024(species_code);SELECT NOW();-- 2025-04-09 16:03:26.640118+00-- Junction Table for Species to Dataset (many-to-many)CREATE TABLE species_dataset (species_id VARCHAR(21) NOT NULL,dataset_id VARCHAR(12) NOT NULL,created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL, -- kinde user idlast_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idPRIMARY KEY (species_id, dataset_id),FOREIGN KEY (species_id) REFERENCES species(id),FOREIGN KEY (dataset_id) REFERENCES dataset(id));SELECT NOW();-- 2025-04-09 16:04:00.064375+00-- indexes for the junction tableCREATE INDEX idx_species_dataset_species ON species_dataset(species_id);CREATE INDEX idx_species_dataset_dataset ON species_dataset(dataset_id);SELECT NOW();-- 2025-04-09 16:04:23.530128+00CREATE TYPE role_name AS ENUM ('ADMIN', 'USER', 'CURATOR');CREATE TYPE permission_name AS ENUM ('READ', 'UPLOAD', 'DOWNLOAD', 'EDIT', 'DELETE');SELECT NOW();-- 2025-04-09 16:05:18.144071+00-- Access Grant Table (Associates roles, users, datasets, and permissions)CREATE TABLE access_grant (id VARCHAR(12) PRIMARY KEY, -- nanoid()dataset_id VARCHAR(12) NOT NULL, --The dataset this grant applies torole role_name NOT NULL, --The role this grant applies topermission permission_name NOT NULL, --The permission being granteduser_id VARCHAR(64), --Optional, Grants permission to a specific user (overrides role). If `NULL`, the grant applies to *all* users with the specified role for the dataset.created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL, -- kinde user idlast_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL, -- kinde user idactive BOOLEAN DEFAULT TRUE, --allows you to effectively "revoke" access without deleting the grant record, preserving historyFOREIGN KEY (dataset_id) REFERENCES dataset(id),CONSTRAINT unique_access_grant UNIQUE (dataset_id, role, permission, user_id) --prevents redundant or conflicting access grants);SELECT NOW();-- 2025-04-09 16:05:52.864153+00-- to prevent duplicate role-based permissions when user_id is NULLCREATE UNIQUE INDEX idx_unique_role_permissionON access_grant(dataset_id, role, permission)WHERE user_id IS NULL;SELECT NOW();-- 2025-04-09 16:06:26.067281+00CREATE INDEX idx_access_grant_dataset ON access_grant(dataset_id);CREATE INDEX idx_access_grant_role ON access_grant(role);CREATE INDEX idx_access_grant_permission ON access_grant(permission);CREATE INDEX idx_access_grant_user ON access_grant(user_id);CREATE INDEX idx_access_grant_active ON access_grant(active);SELECT NOW();-- 2025-04-09 16:06:55.141239+00CREATE TABLE user_role (user_id VARCHAR(64) PRIMARY KEY, -- kinde user_id, user can have 1 role onlyrole role_name NOT NULL DEFAULT 'USER',created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,created_by VARCHAR(64) NOT NULL, -- kinde user idlast_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,modified_by VARCHAR(64) NOT NULL -- kinde user id);SELECT NOW();-- 2025-04-09 16:07:16.366116+00CREATE INDEX idx_user_role ON user_role(user_id);SELECT NOW();-- 2025-04-09 16:08:04.699188+00-- add location [x]-- copy csv into duckdb using cli if needed to sanitise-- COPY location FROM 'location.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER);-- COPY location TO 'location.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER, NULL '');-- upload to neon\copy location FROM 'location.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 16:12:13.798421+00-- add cyclic_recording_pattern [x]-- copy csv into duckdb using cli if needed to sanitise-- COPY cyclic_recording_pattern FROM 'cyclic_recording_pattern.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER);-- COPY cyclic_recording_pattern TO 'cyclic_recording_pattern.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER, NULL '');-- upload to neon\copy cyclic_recording_pattern FROM 'cyclic_recording_pattern.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 16:12:43.958352+00-- add cluster [x]-- copy csv into duckdb using cli if needed to sanitise-- COPY cluster FROM 'cluster.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER);-- COPY cluster TO 'cluster.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER, NULL '');-- upload to neon\copy cluster FROM 'cluster.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 16:13:46.196433+00-- add file-- copy csv into duckdb using cli if needed to sanitise-- COPY file FROM 'file.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER);-- COPY file TO 'file.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER, NULL '');-- upload to neon\copy file FROM 'file3.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 16:49:21.294746+00-- add moth_metadata-- copy csv into duckdb using cli if needed to sanitise-- COPY moth_metadata FROM 'moth_metadata.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER);-- COPY moth_metadata TO 'moth_metadata.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER, NULL '');-- upload to neon\copy moth_metadata FROM 'moth_metadata.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 17:18:36.966633+00-- add file_dataset-- copy csv into duckdb using cli if needed to sanitise-- COPY file_dataset FROM 'file_dataset.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER);-- COPY file_dataset TO 'file_dataset.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER, NULL '');-- upload to neon\copy file_dataset FROM 'file_dataset.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 17:37:03.178435+00-- add species-- copy csv into duckdb using cli if needed to sanitise-- COPY species FROM 'species.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER);-- COPY species TO 'species.tsv' (FORMAT CSV, DELIMITER E'\t', HEADER, NULL '');-- upload to neon\copy species FROM 'species.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 17:52:35.538073+00-- add call_type-- upload to neon\copy call_type FROM 'call_type.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 17:53:24.384375+00-- add filter data-- upload to neon\copy filter FROM 'filter.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 17:53:58.180513+00-- file_metadata_avianz.tsv\copy file_metadata FROM 'file_metadata_avianz.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 17:54:56.536164+00\copy selection FROM 'selection_avianz.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 17:55:59.247693+00\copy selection_metadata FROM 'selection_metadata_avianz.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 17:56:32.451374+00\copy label FROM 'label_avianz.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 17:57:17.600579+00\copy label_subtype FROM 'label_subtype_avianz.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 17:57:57.270427+00\copy species_dataset FROM 'species_dataset.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 17:58:40.500378+00-- selections data for pomona 20230418\copy selection FROM 'selection_pomona_20230418.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 18:02:25.360178+00\copy selection_metadata FROM 'selection_metadata_pomona_20230418.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 18:03:38.790581+00\copy label FROM 'label_pomona_20230418.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 18:08:04.202488+00\copy label_subtype FROM 'label_subtype_pomona_20230418.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 18:10:55.322432+00-- selections data for pomona 20250304\copy selection FROM 'selection_pomona_20250304.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 18:14:26.407255+00\copy selection_metadata FROM 'selection_metadata_pomona_20250304.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 18:15:08.567327+00\copy label FROM 'label_pomona_20250304.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 18:18:05.112561+00\copy label_subtype FROM 'label_subtype_pomona_20250304.tsv' WITH DELIMITER E'\t' CSV HEADER NULL AS ''SELECT NOW();-- 2025-04-09 18:21:01.12717+00INSERT INTO access_grant (id,dataset_id,role,permission,user_id,created_at,created_by,last_modified,modified_by,active) VALUES('5HCmMIimQMcD', 'Yx0oNUDmP5ch', 'ADMIN', 'READ', NULL, '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', true),('_Z-XdwNTvDFg', 'Yx0oNUDmP5ch', 'ADMIN', 'UPLOAD', NULL, '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', true),('CWZyEI1Oa1i', 'Yx0oNUDmP5ch', 'ADMIN', 'DOWNLOAD', NULL, '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', true),('-Ohke6B7z6Ht', 'Yx0oNUDmP5ch', 'ADMIN', 'EDIT', NULL, '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', true),('5ske4RlY2xjJ', 'Yx0oNUDmP5ch', 'ADMIN', 'DELETE', NULL, '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', true),('zJjp5lmZid0e', 'gljgxDbfasva', 'ADMIN', 'READ', NULL, '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', true),('fEJysuxie266', 'gljgxDbfasva', 'ADMIN', 'UPLOAD', NULL, '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', true),('hz-hnQ3AScnV', 'gljgxDbfasva', 'ADMIN', 'DOWNLOAD', NULL, '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', true),('ob4WillMpLnV', 'gljgxDbfasva', 'ADMIN', 'EDIT', NULL, '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', true),('qd5ABiggu2np', 'gljgxDbfasva', 'ADMIN', 'DELETE', NULL, '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', true);SELECT NOW();-- 2025-04-09 18:22:12.620392+00INSERT INTO user_role (user_id,role,created_at,created_by,last_modified,modified_by) VALUES('kp_8663c3b3d4654734874b17d846bbe52d', 'ADMIN', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d', '2025-04-05T01:56:14.785+00:00', 'kp_8663c3b3d4654734874b17d846bbe52d');SELECT NOW();-- 2025-04-09 18:22:38.804412+00
# MEnpm create cloudflare@latest skraak -- --template=cloudflare/templates/vite-react-template //for vite+react+cloudflare templatepijul init, add, recordclaudevolta pin node@22npm i drizzle-orm @neondatabase/serverlessnpm i -D drizzle-kit postgres dotenv tsxcopied in .dev.vars with neon connection stringcopied db/ in, including converted db/schema.tsadded api/datasets routeadded raw get to App.tsx to get datasetscleaned out all unused code and stuff including svg'snpm run deploy again to check it works on cloudflarefixed errors and deployed ok
# CLAUDE.mdThis file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.## Commands- Build: `npm run build` (TypeScript build + Vite build)- Dev server: `npm run dev` (Vite dev server with HMR)- Lint: `npm run lint` (ESLint)- Type check: `tsc -b` (TypeScript build checking)- Deploy: `npm run deploy` (builds and deploys to Cloudflare Workers)- Preview: `npm run preview` (preview production build locally)## Code Style Guidelines- TypeScript strict mode with noUnusedLocals, noUnusedParameters flags- React functional components with hooks (follow react-hooks plugin rules)- Project structure: src/react-app (frontend), src/worker (Cloudflare Workers backend)- Import order: React/external libraries first, then internal modules- Naming: PascalCase for components, camelCase for functions/variables- Error handling: use try/catch blocks for async operations- Use Hono for backend API routes with typed request/response interfaces- Avoid any type; use explicit type annotations for function parameters/returns- Follow ESLint recommended configurations for JS and TypeScript