new functiont in Labels.jl

[?]
AEj8dahVWy718uSSFPe9VSRJ5qX5G8pC2zvFzJJ8yzBd
Nov 17, 2024, 8:27 PM
5M7JW5OVBMQ2EX5QDNJ2FRQPB3DIRG7FDW7DMXXTI2NDRTTSEJZAC

Dependencies

  • [2] 7KO4BAOG Tidy up
  • [3] ACUJ2OKI label summary now workt on cwd unless folder specified, note trailing / required if folder is specified
  • [4] 2UBDFCJH new files tracked
  • [5] QA2TJZRA ripped ML out into the SkraakML repo, compiles quicker now
  • [6] 2O6SHIVY added avianz_of_raven function to Labels.jl
  • [7] YXAKJSDT added check_change_avianz_species! function to Labels.jl
  • [8] YODTMMPT added a whole bunch of functions to Labels.jl for working with raven and avians labels
  • [9] Y2LW6LAA deduplicated an export in Labels.jl

Change contents

  • replacement in src/Labels.jl at line 7
    [2.1095][2.1095:1116]()
    avianz_of_raven,
    [2.1095]
    [2.1116]
    avianz_of_raven, #not working right, 1 file per label I think
  • edit in src/Labels.jl at line 12
    [2.1217]
    [2.1217]
    one_hot_labels,
  • replacement in src/Labels.jl at line 179
    [4.14][4.14:53]()
    # Raven selections.txt to AviaNZ .data
    [4.14]
    [4.53]
    # ASSUMES 60 second files, also wav and txt file in same folder
    # Raven selections.txt to AviaNZ .data
  • replacement in src/Labels.jl at line 186
    [4.235][4.342:378]()
    function avianz_of_raven(f::String)
    [4.235]
    [4.1813]
    function avianz_of_raven(f::String) #not working right, check
  • edit in src/Labels.jl at line 214
    [4.1888]
    [4.742]
    duration = Float64[],
  • edit in src/Labels.jl at line 221
    [4.893]
    [4.893]
    dur = data[1]["Duration"]
  • edit in src/Labels.jl at line 230
    [4.1151]
    [4.1151]
    duration = dur,
  • replacement in src/Labels.jl at line 306
    [4.2366][3.7:112]()
    # if specified folder must include trailing /
    function label_summary(folder::String = "", avianz = true)
    [4.2366]
    [4.2420]
    # if specified folder must include trailing /, can be "" for current folder
    function label_summary(folder::String, avianz::Bool = true)
  • edit in src/Labels.jl at line 311
    [4.2057]
    [4.2533]
    duration = Float64[],
  • replacement in src/Labels.jl at line 325
    [4.3014][4.3014:3094]()
    select!(df, [:File, :start_time, :end_time, :low_f, :high_f, :Species])
    [4.3014]
    [3.187]
    select!(df, [:File, :duration, :start_time, :end_time, :low_f, :high_f, :Species])
  • edit in src/Labels.jl at line 360
    [4.4280]
    [4.4280]
    end
    #allow 0.2 overlap each side
    #round end_time down to nearest 5
    function et(end_time::Float64)
    end_time % 5 > 0.2 ? (c0 = ceil(end_time / 5) * 5) : c0 = floor(end_time / 5) * 5
    return c0
    end
    #allow 0.2 overlap each side
    #round start_time down to nearest 5
    function st(start_time::Float64)
    start_time % 5 < 4.8 ? (f = floor(start_time / 5) * 5) : f = ceil(start_time / 5) * 5
    return f
  • edit in src/Labels.jl at line 374
    [4.4284]
    #labels must be a df loaded from label_summary run over avianz data (not raven)
    function one_hot_labels(labels::DataFrame)
    gdf = groupby(labels, :File)
    vdf = []
    for group in gdf
    dur = first(group.duration)
    nrows = dur ÷ 5
    seil = nrows * 5
    df = DataFrame(
    file = [first(group.File) for x = 1:nrows],
    start_time = collect(0:5:seil-1),
    end_time = collect(5:5:seil),
    )
    for row in eachrow(group)
    fst = st(row.start_time)
    @assert fst >= 0
    lst0 = et(row.end_time)
    #end time must not be greater than duration
    lst0 > dur ? lst = lst0 - 5 : lst = lst0
    @assert lst <= dur
    f_idx = fst ÷ 5 + 1 |> Int
    l_idx = lst ÷ 5 |> Int
    vect = [false for x = 1:nrows]
    for idx = f_idx:l_idx
    vect[idx] = true
    end
    col_name = row.Species
    df[!, Symbol(col_name)] = vect
    end
    #(names(df) |> length) > 4 && println(df)
    push!(vdf, df)
    end
    cdf1 = reduce(
    (x, y) ->
    outerjoin(x, y, matchmissing = :equal, on = intersect(names(x), names(y))),
    vdf,
    )
    cdf2 = coalesce.(cdf1, false)
    end