mkpath(f)
outfile = "$f/$name"
sample = signal[Int(st):Int(en)]
WAV.wavwrite(sample, "$outfile.wav", Fs = Int(freq))
image = get_image_from_sample(sample, freq)
PNGFiles.save("$outfile.png", image)
end
print(".")
end
print(".")
end
=#
#make_clips_clusters(glob("*/"), "preds3_opensoundscape-kiwi-1.2_2025-07-09.csv")
#make_clips_clusters(glob("*/"), "predsST_opensoundscape-kiwi-1.5_2025-07-09.csv")
# Valid file names follow this pattern: preds3_opensoundscape-kiwi-1.2_2025-07-09.csv
function make_clips_clusters(
clusters::Vector{String},
csv_name::String
)::nothing
x=split(csv_name, "_")
@assert length(x) == 3
@assert "preds" occursin x[1]
@assert "opensoundscape" occursin x[2]
@assert length(x[3]) == 14
@assert ".csv" occursin x[3]
model = split(x[2])
sensitivity = replace(first(x), "preds"=>"")
for cluster in clusters
cd(cluster)
preds = glob("*/$csv_name")
for pred in preds
l=CSV.read(pred, DataFrame) |> x -> names(x)
for ebird in l
try
make_clips_generic(pred, ebird, model, sensitivity, true)
catch e
@info e
end
end
end
cd("..")
end
return nothing
end
function make_clips_generic(
preds_path::String,
label::String, ##column header, ie ebird or "Kiwi"
model_name::String,
sensitivity::String
unique_file_names = true,
)
# Assumes function run from Kahurangi Data
#pth = replace(preds_path, "preds-2024-10-21.csv" => "")
pth0 = split(preds_path, "/")
length(pth0) > 1 ? (pth = joinpath(pth0[1:end-1]) * "/") : pth = ""
function assert_detections_present_(df::DataFrame, label::String, preds_path)::DataFrame
1 in levels(df.label) ? (return df) :
@error "No detections for label = $label at $preds_path"
end
# Load and group data frame by file
gdf =
#! format: off
DataFrames.DataFrame(CSV.File(preds_path)) |>
x -> assert_not_empty(x, preds_path) |>
x -> assert_detections_present_(x, label, preds_path) |>
x -> filter_positives!(x, label) |>
group_by_file!
#! format: on
# Make clip and spectrogram
for (k, v) in pairs(gdf)
#file_name = chop(v.file[1], head = 2, tail = 4)
file_name, extension = path_to_file_string(v.file[1])
#@info (file_name, extension)
start_times = v.start_time |> x ->
convert(Vector{Float64}, x) |>
#dropmissing(x, disallowmissing = true) |> ######CHECK used to make cobb work. not working anymore, but convert works fine. This happens because the col type of dataframe is Float64? even though no missings, seems to ony happen with doc recorders
sort
detections = cluster_detections(start_times)
isempty(detections) && continue
signal, freq = WAV.wavread("$pth$(file_name).$(extension)")
if size(signal, 2) == 2
signal = (signal[:, 1] + signal[:, 2]) / 2
end
if freq > 8000
signal, freq = resample_to_8000hz(signal, freq)
end
freq = freq |> Float32
length_signal = length(signal)
for detection in detections
st, en = calculate_clip_start_end(detection, freq, length_signal)
if unique_file_names == true
name = "$file_name-$(Int(floor(st/freq)))-$(Int(ceil(en/freq)))" #leave off path, not necesaray if unique file names
else
p = replace(pth, "/" => "--") #replace / with -- including trailing /
name = "$p$file_name-$(Int(floor(st/freq)))-$(Int(ceil(en/freq)))"
end
f = "Clips_$(model_name)_$(today())"