now handle .data files

quietlight
Mar 12, 2026, 2:26 AM
UALO24UKLL2VLKCC5JKMGJRJD2MKE227NJMCFQRXRRFYXJDUD2UAC

Dependencies

  • [2] KUSOV4SZ mods to manifest, predict, train, on linux
  • [3] FCL6FKHM initial commit after ripping ML out of Skraak, it does compile on Mac Julia 1.10
  • [4] 2LYEPJIV changes that were left on linux

Change contents

  • replacement in src/Predict.jl at line 7
    [3.10324][3.10324:10415]()
    DSP, Images, ThreadsX, Dates, DataFrames, CSV, Flux, CUDA, Metalhead, JLD2, FLAC, Glob
    [3.10324]
    [3.10415]
    DSP, Images, ThreadsX, Dates, DataFrames, CSV, Flux, CUDA, Metalhead, JLD2, JSON, FLAC, Glob, PerceptualColourMaps
  • edit in src/Predict.jl at line 14
    [3.10607]
    [3.10607]
    return signal, freq
    end
    function _resample_to_8000hz(signal, freq)
    signal = DSP.resample(signal, 8000.0f0 / freq; dims=1)
    freq = 8000
  • edit in src/Predict.jl at line 104
    [3.12729]
    [3.12729]
    end
    end
    function predict(folders::Vector{String}, model::String, labels::Dict)
    model = load_model_pred(model) |> device
    @info "Folders: $folders"
    for folder in folders
    @info "Working on: $folder"
    predict_folder(folder, model, labels)
  • replacement in src/Predict.jl at line 135
    [3.13333][3.13333:13380]()
    function predict_folder(folder::String, model)
    [3.13333]
    [3.13380]
    function predict_folder(folder::String, model, labels::Dict=Dict())
    data_files = Glob.glob("$folder/*.data")
    if !isempty(data_files) && !isempty(labels)
    predict_avianz_folder(data_files, model, folder, labels)
    return
    end
  • edit in src/Predict.jl at line 315
    [3.19053]
    [3.19053]
    ############### AviaNZ .data file support ################
  • edit in src/Predict.jl at line 318
    [3.19054]
    [3.19054]
    function predict_avianz_folder(data_files::Vector{String}, model, folder::String, labels::Dict)
    @info "$(length(data_files)) .data files in $folder"
    for data_file in data_files
    predict_avianz_file(data_file, model, labels)
    end
    end
    function predict_avianz_file(data_file::String, model, labels::Dict)
    @info "Processing: $data_file"
    data = JSON.parsefile(data_file)
    wav_file = data_file[1:end-5] # strip .data suffix
    if !isfile(wav_file)
    @warn "Audio file not found: $wav_file, skipping"
    return
    end
    signal, freq = load_audio_file(wav_file)
    if freq != 8000
    signal, freq = _resample_to_8000hz(signal, freq)
    end
    f = convert(Int, freq)
    # Collect images and references to Kiwi segment labels
    raw_images = []
    kiwi_labels = [] # references to the label dicts to update
    # Elements 2..N are segments (index 1 is metadata in AviaNZ format)
    for i in 2:length(data)
    segment = data[i]
    # Each segment is [start_time, end_time, low_freq, high_freq, [labels...]]
    segment_labels = segment[5]
    for label in segment_labels
    if isa(label, Dict) && haskey(label, "species") && label["species"] == "Kiwi"
    start_time = segment[1]
    end_time = segment[2]
    start_sample = max(1, round(Int, start_time * f) + 1)
    end_sample = min(size(signal, 1), round(Int, end_time * f))
    if end_sample <= start_sample
    @warn "Empty segment at $start_time-$end_time in $data_file, skipping"
    continue
    end
    sample = signal[start_sample:end_sample, 1]
    image = _get_image_from_sample(sample, f)
    push!(raw_images, image)
    push!(kiwi_labels, label)
    end
    end
    end
    if isempty(raw_images)
    @info "No Kiwi segments in $data_file"
    return
    end
    @info "$(length(raw_images)) Kiwi segments in $data_file"
    # Create DataLoader and predict
    n_samples = length(raw_images)
    loader = avianz_loader(raw_images, n_samples)
    preds = []
    for x in loader
    p = Flux.onecold(model(x))
    append!(preds, p)
    end
    # Update species/calltype in each Kiwi segment's label
    for (i, label) in enumerate(kiwi_labels)
    pred = labels[preds[i]]
    if pred == "Don't Know"
    label["species"] = "Don't Know"
    delete!(label, "calltype")
    else
    label["calltype"] = pred
    end
    end
    # Write modified JSON back to .data file
    open(data_file, "w") do io
    JSON.print(io, data, 2)
    end
    @info "Updated $data_file"
    end
    function avianz_loader(raw_images::Vector, n_samples::Int)
    #! format: off
    processed = map(raw_images) do img
    colorview(RGB, permutedims(img, (3, 1, 2))) |>
    x -> Images.RGB.(x) |>
    x -> collect(channelview(float32.(x))) |>
    x -> permutedims(x, (3, 2, 1))
    end
    #! format: on
    images = cat(processed..., dims=4)
    loader = Flux.DataLoader(images, batchsize=n_samples, shuffle=false)
    device == gpu ? loader = CuIterator(loader) : nothing
    return loader
    end
  • edit in Project.toml at line 17
    [3.28103]
    [3.28103]
    JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
  • replacement in Manifest.toml at line 3
    [3.73][2.1468:1493]()
    julia_version = "1.10.6"
    [3.73]
    [3.98]
    julia_version = "1.10.11"
  • replacement in Manifest.toml at line 5
    [3.122][2.1494:1552]()
    project_hash = "fb2ea7cb2e8eacb3a2ce0f311bec38248363d294"
    [3.122]
    [3.180]
    project_hash = "5cdc2ceafadad9064b331599e55f67e4277793b6"
  • replacement in Manifest.toml at line 1017
    [3.36894][2.2250:2271]()
    version = "2.28.2+1"
    [3.36894]
    [3.36915]
    version = "2.28.1010+0"
  • replacement in Manifest.toml at line 1058
    [3.38207][2.2272:2294]()
    version = "2023.1.10"
    [3.38207]
    [3.38230]
    version = "2025.12.2"
  • replacement in Manifest.toml at line 1152
    [3.41255][2.2613:2634]()
    version = "0.3.23+4"
    [3.41255]
    [3.41276]
    version = "0.3.23+5"
  • replacement in Manifest.toml at line 1175
    [3.42015][3.42015:42035]()
    version = "0.8.1+2"
    [3.42015]
    [3.42035]
    version = "0.8.5+0"
  • replacement in Manifest.toml at line 1791
    [3.62765][3.62765:62786]()
    version = "17.4.0+2"
    [3.62765]
    version = "17.6.1+0"