recent mods, catchup

quietlight
Jul 21, 2025, 4:06 AM
UHQJPD6UDTPZ74GUOP4EPZKGZEKVIQBP4S6GF4O6VMO7UMRPQVJAC

Dependencies

  • [2] S3GASDBF catch up, changes in clips, parse
  • [3] BONWFSH4 update on linux and work on clips
  • [4] X54TLSYE catch up commit, train now working with julia 1.10.5 but not 1.11
  • [5] UTM4NN57 changes that were left on linux
  • [6] JYCKLP2E changes to clips and predict
  • [7] ZI5JRTFF small change to clips
  • [8] 2UBDFCJH new files tracked
  • [9] SMODB47P mods to clips to truncate freq to max 16000
  • [10] ROFI4OLA catch up, working on colour images
  • [11] NMQCXLNG catch up commit, about to do some work on prediction loop
  • [12] AQWXGGE3 mod to clips.jl
  • [13] QA2TJZRA ripped ML out into the SkraakML repo, compiles quicker now
  • [14] QRBFIGF2 mods to clips, file meta data, oe lieux
  • [15] MMG2PLXK new functiion to make clips of downloaded bisd calls
  • [16] 7KO4BAOG Tidy up

Change contents

  • replacement in src/FileMetaData.jl at line 8
    [4.4718][4.4718:4742](),[4.4742][4.2285:2320]()
    using Glob, Skraak, CSV
    folders=Glob.glob("*/2024-10-18/")
    [4.4718]
    [4.8]
    using Glob, Skraak, CSV, DataFrames
    folders=Glob.glob("*/2025-05-18/")
  • replacement in src/FileMetaData.jl at line 14
    [3.7517][4.31:128](),[4.4855][4.31:128]()
    CSV.write("/media/david/Pomona-4/Pomona/pomona_files_20241126_new.csv", df; append=true)
    [3.7517]
    [4.1805]
    CSV.write("/media/david/Pomona-4/Pomona/pomona_files_20250522_new.csv", df; append=true)
    #CSV.write("/media/david/Pomona-4/Pomona/pomona_files_20241126_new.csv", df; append=true)
  • edit in src/FileMetaData.jl at line 72
    [4.6543]
    [4.6543]
    xxh64 = String[],
  • edit in src/FileMetaData.jl at line 208
    [3.7674]
    [4.11595]
    xxh64 = readchomp(`xxh-hash $file`) #run(`xxh-hash $file`)
  • edit in src/FileMetaData.jl at line 230
    [4.12152]
    [4.12152]
    xxh64,
  • edit in src/Clips.jl at line 102
    [4.15744]
    [4.15744]
    end
    #=
    using Opus, PNGFiles, Skraak
    function image_from_opus(file)
    signal, freq = Opus.load(file)
    if size(signal, 2) == 2
    signal = (signal[:, 1] + signal[:, 2]) / 2
    end
    if freq > 8000
    signal, freq = Skraak.resample_to_8000hz(signal, freq)
    end
    freq = freq |> Float32
    sample = signal[:]
    image = Skraak.get_image_from_sample(sample, freq)
    PNGFiles.save("$(file).png", image)
    print(".")
  • edit in src/Clips.jl at line 120
    [4.15748]
    [4.15748]
    =#
  • replacement in src/Clips.jl at line 146
    [4.16528][4.16528:16543]()
    # assumes kiwi
    [4.16528]
    [4.16543]
    ##################################################################
  • replacement in src/Clips.jl at line 149
    [4.16641][4.16641:16684]()
    filter!(row -> row.label == label, df)
    [4.16641]
    [4.16684]
    filter!(row -> row.label == 1, df)
  • edit in src/Clips.jl at line 306
    [4.21058]
    [4.21058]
    label = "Rowi"
  • replacement in src/Clips.jl at line 308
    [4.21062][4.21062:21108]()
    function move_clips_to_folders(df::DataFrame)
    [4.21062]
    [4.5796]
    function move_clips_to_folders(df::DataFrame, label::String)
  • edit in src/Clips.jl at line 315
    [4.21505]
    [4.21505]
    call_map = Dict(1 => "Duet", 2 => "Female - Solo", 3 => "Male - Solo")
  • replacement in src/Clips.jl at line 318
    [4.21555][4.21555:21628]()
    dst = "$(row.label)/$(row.file)"
    mkpath("$(row.label)/")
    [4.21555]
    [4.21628]
    if row.label == 4
    dst = "Don't Know/$(row.file)"
    mkpath("Don't Know/")
    else
    dst = "$label/$(call_map[row.label])/$(row.file)"
    mkpath("$label/$(call_map[row.label])")
    end
    #dst = "$(row.label)/$(row.file)"
    #mkpath("$(row.label)/")
  • replacement in src/Clips.jl at line 331
    [4.300][4.21738:21766](),[4.21738][4.21738:21766]()
    if isdir(video)
    [4.300]
    [4.21766]
    #=if isdir(video)
  • replacement in src/Clips.jl at line 337
    [4.21970][4.21970:21986]()
    end
    [4.21970]
    [4.21986]
    end=#
  • edit in src/Clips.jl at line 379
    [4.1557]
    [4.1557]
    predictions=[a;b;c]
    Flora
    a=glob("../Misc-1/Friends of Flora (1)/Flora */*/preds3_Kahurangi_1-5_2025-02-14.csv")
    b=glob("../Misc-1/Friends of Flora (1)/Flora */*/*/preds3_Kahurangi_1-5_2025-02-1*.csv")
    c=glob("../Misc-1/Friends of Flora (1)/Flora */*/*/*/preds3_Kahurangi_1-5_2025-02-1*.csv")
  • replacement in src/Clips.jl at line 388
    [4.1658][4.1658:1675]()
    for file in list
    [4.1658]
    [4.1675]
    for file in predictions
  • replacement in src/Clips.jl at line 398
    [4.2501][2.85:136]()
    make_clips_generic(file, 1, "Kahurangi3_ST", true)
    [4.2501]
    [4.2526]
    make_clips_generic(file, 1, "MOK_202505-_K1-5_T3", true) #####false
  • edit in src/Clips.jl at line 404
    [4.2150]
    [4.1843]
    #note of false above, be close to the wavs to minimise length of fname, wont save anything if .. in filename
    #=
  • edit in src/Clips.jl at line 464
    [4.843]
    [4.4523]
    mkpath(f)
    outfile = "$f/$name"
    sample = signal[Int(st):Int(en)]
    WAV.wavwrite(sample, "$outfile.wav", Fs = Int(freq))
    image = get_image_from_sample(sample, freq)
    PNGFiles.save("$outfile.png", image)
    end
    print(".")
    end
    print(".")
    end
    =#
    #make_clips_clusters(glob("*/"), "preds3_opensoundscape-kiwi-1.2_2025-07-09.csv")
    #make_clips_clusters(glob("*/"), "predsST_opensoundscape-kiwi-1.5_2025-07-09.csv")
    # Valid file names follow this pattern: preds3_opensoundscape-kiwi-1.2_2025-07-09.csv
    function make_clips_clusters(
    clusters::Vector{String},
    csv_name::String
    )::nothing
    x=split(csv_name, "_")
    @assert length(x) == 3
    @assert "preds" occursin x[1]
    @assert "opensoundscape" occursin x[2]
    @assert length(x[3]) == 14
    @assert ".csv" occursin x[3]
    model = split(x[2])
    sensitivity = replace(first(x), "preds"=>"")
    for cluster in clusters
    cd(cluster)
    preds = glob("*/$csv_name")
    for pred in preds
    l=CSV.read(pred, DataFrame) |> x -> names(x)
    for ebird in l
    try
    make_clips_generic(pred, ebird, model, sensitivity, true)
    catch e
    @info e
    end
    end
    end
    cd("..")
    end
    return nothing
    end
    function make_clips_generic(
    preds_path::String,
    label::String, ##column header, ie ebird or "Kiwi"
    model_name::String,
    sensitivity::String
    unique_file_names = true,
    )
    # Assumes function run from Kahurangi Data
    #pth = replace(preds_path, "preds-2024-10-21.csv" => "")
    pth0 = split(preds_path, "/")
    length(pth0) > 1 ? (pth = joinpath(pth0[1:end-1]) * "/") : pth = ""
    function assert_detections_present_(df::DataFrame, label::String, preds_path)::DataFrame
    1 in levels(df.label) ? (return df) :
    @error "No detections for label = $label at $preds_path"
    end
    # Load and group data frame by file
    gdf =
    #! format: off
    DataFrames.DataFrame(CSV.File(preds_path)) |>
    x -> assert_not_empty(x, preds_path) |>
    x -> assert_detections_present_(x, label, preds_path) |>
    x -> filter_positives!(x, label) |>
    group_by_file!
    #! format: on
    # Make clip and spectrogram
    for (k, v) in pairs(gdf)
    #file_name = chop(v.file[1], head = 2, tail = 4)
    file_name, extension = path_to_file_string(v.file[1])
    #@info (file_name, extension)
    start_times = v.start_time |> x ->
    convert(Vector{Float64}, x) |>
    #dropmissing(x, disallowmissing = true) |> ######CHECK used to make cobb work. not working anymore, but convert works fine. This happens because the col type of dataframe is Float64? even though no missings, seems to ony happen with doc recorders
    sort
    detections = cluster_detections(start_times)
    isempty(detections) && continue
    signal, freq = WAV.wavread("$pth$(file_name).$(extension)")
    if size(signal, 2) == 2
    signal = (signal[:, 1] + signal[:, 2]) / 2
    end
    if freq > 8000
    signal, freq = resample_to_8000hz(signal, freq)
    end
    freq = freq |> Float32
    length_signal = length(signal)
    for detection in detections
    st, en = calculate_clip_start_end(detection, freq, length_signal)
    if unique_file_names == true
    name = "$file_name-$(Int(floor(st/freq)))-$(Int(ceil(en/freq)))" #leave off path, not necesaray if unique file names
    else
    p = replace(pth, "/" => "--") #replace / with -- including trailing /
    name = "$p$file_name-$(Int(floor(st/freq)))-$(Int(ceil(en/freq)))"
    end
    f = "Clips_$(model_name)_$(today())"