added parse

[?]
AEj8dahVWy718uSSFPe9VSRJ5qX5G8pC2zvFzJJ8yzBd
Jan 13, 2025, 7:24 PM
6XJX6ED3CDARYVCX5XCZMRE4UB3PHD2JAWB67DZQFEXV336VERZQC

Dependencies

  • [2] 5M7JW5OV new functiont in Labels.jl
  • [3] QA2TJZRA ripped ML out into the SkraakML repo, compiles quicker now
  • [4] 2UBDFCJH new files tracked
  • [5] E3Y55MPR added perceptual colour maps and changed get_image_from_sample
  • [6] 4BTZNCRM catch up commit before starting work
  • [*] NV7FXZ5Q first commit
  • [*] YODTMMPT added a whole bunch of functions to Labels.jl for working with raven and avians labels

Change contents

  • edit in src/Skraak.jl at line 11
    [8.345651]
    [8.356019]
    include("Parse.jl")
  • file addition: Parse.jl (----------)
    [8.6598]
    #=
    a = ["201012_123456.wav", "201014_123456.WAV", "201217_123456.wav", "211122_123456.WAV"]
    b = ["121020_123456.WAV", "141020_123456.wav", "171220_123456.WAV", "221121_123456.wav"]
    c = ["20230609_103000.WAV", "20241109_201504.wav"]
    d = [
    "120119_003002.wav",
    "180120_231502.wav",
    "170122_010005.wav",
    "010419_234502.WAV",
    "310320_231502.wav",
    "220824_231502.WAV",
    "240123_231502.wav",
    ]
    e = ["XYZ123_7689_20230609_103000.WAV", "string 20241109_201504.wav"]
    f = [
    "abcdefg__1234_180120_231502.wav",
    "string 120119_003002.wav",
    "ABCD EFG___170122_010005.wav",
    "BHD_1234 010419_234502.WAV",
    "cill xyz 310320_231502.wav",
    "220824_231502.WAV",
    "240123_231502.wav",
    ]
    =#
    using Dates
    #Assumes it's getting a folder of 1 recording session, not random files.
    #For 6 digit dates the year digits need to have less variation than the day digits.
    #Does not handle miss shapen dates filenamesBad = ["20230609_1030qa.WAV", "20241109_20wp04.wav"]
    function date_time_of_fname(filenames::Vector{String})
    pattern = r"(\d{6}|\d{8})_\d{6}"
    f2 = [match(pattern, str).match for str in filenames]
    #date_time format must be the same for whole vector
    g = map(x -> length(x), f2) |> x -> unique(x)
    @assert length(g) == 1 "Different date formats in vector" #all same length
    @assert length(f2[1]) == 15 || length(f2[1]) == 13 "Wrong length to be a date_time"
    raw_dt = map(x -> (split(x, "_")[1], split(x, "_")[2]), f2)
    d = parse_date_strings(first.(raw_dt))
    t = parse_time_strings(last.(raw_dt))
    dt = map((x, y) -> (x..., y...), d, t)
    return map(x -> DateTime(x...), dt)
    end
    function parse_time_strings(times)
    h = map(x -> tryparse(Int64, x[1:2]), times)
    m = map(x -> tryparse(Int64, x[3:4]), times)
    s = map(x -> tryparse(Int64, x[5:6]), times)
    return zip(h, m, s) |> collect
    end
    function parse_date_strings(dates)
    if length(dates[1]) == 8
    y = map(x -> tryparse(Int64, x[1:4]), dates)
    m = map(x -> tryparse(Int64, x[5:6]), dates)
    d = map(x -> tryparse(Int64, x[7:8]), dates)
    elseif length(dates[1]) == 6
    y, m, d = parse_short_date_strings(dates)
    else
    error("Date is not 8 or 6 digits long")
    end
    return zip(y, m, d) |> collect
    end
    # 6 digit dates
    function parse_short_date_strings(dates)
    @assert length(dates) > 1 "Not enough files to work out YYMMDD v DDMMYY"
    x1 = map(x -> tryparse(Int64, x[1:2]), dates)
    m = map(x -> tryparse(Int64, x[3:4]), dates)
    x2 = map(x -> tryparse(Int64, x[5:6]), dates)
    if length(dates) > 1
    length(unique(x2)) >= length(unique(x1)) ? (y, d) = ((x1 .+ 2000), x2) :
    (y, d) = ((x2 .+ 2000), x1) #assumes year was in 2000's, if 1 file or variance in y and y is equal, it assumes DDMMYY
    else
    @info "Not enough files to work out YYMMDD v DDMMYY, assuming DDMMYY" #check this is sensible I think DDMMYY is most common
    y, d = (x2 .+ 2000), x1
    end
    return y, m, d
    end
  • edit in src/Labels.jl at line 361
    [9.4280]
    [2.626]
    end
    function folder_summary()
    a=glob("*/*.data")
  • replacement in src/Clips.jl at line 229
    [3.5040][3.837:863](),[3.2054][3.837:863]()
    #x -> RGB.(x) |>
    [3.5040]
    [3.5041]
    #x -> RGB.(x) |>
  • file addition: Parse.jl (----------)
    [8.6598]
    #=
    a = ["201012_123456.wav", "201014_123456.WAV", "201217_123456.wav", "211122_123456.WAV"]
    b = ["121020_123456.WAV", "141020_123456.wav", "171220_123456.WAV", "221121_123456.wav"]
    c = ["20230609_103000.WAV", "20241109_201504.wav"]
    d = [
    "120119_003002.wav",
    "180120_231502.wav",
    "170122_010005.wav",
    "010419_234502.WAV",
    "310320_231502.wav",
    "220824_231502.WAV",
    "240123_231502.wav",
    ]
    e = ["XYZ123_7689_20230609_103000.WAV", "string 20241109_201504.wav"]
    f = [
    "abcdefg__1234_180120_231502.wav",
    "string 120119_003002.wav",
    "ABCD EFG___170122_010005.wav",
    "BHD_1234 010419_234502.WAV",
    "cill xyz 310320_231502.wav",
    "220824_231502.WAV",
    "240123_231502.wav",
    ]
    =#
    using Dates
    #Assumes it's getting a folder of 1 recording session, not random files.
    #For 6 digit dates the year digits need to have less variation than the day digits.
    #Does not handle miss shapen dates filenamesBad = ["20230609_1030qa.WAV", "20241109_20wp04.wav"]
    function date_time_of_fname(filenames::Vector{String})
    pattern = r"(\d{6}|\d{8})_\d{6}"
    f2 = [match(pattern, str).match for str in filenames]
    #date_time format must be the same for whole vector
    g = map(x -> length(x), f2) |> x -> unique(x)
    @assert length(g) == 1 "Different date formats in vector" #all same length
    @assert length(f2[1]) == 15 || length(f2[1]) == 13 "Wrong length to be a date_time"
    raw_dt = map(x -> (split(x, "_")[1], split(x, "_")[2]), f2)
    d = parse_date_strings(first.(raw_dt))
    t = parse_time_strings(last.(raw_dt))
    dt = map((x, y) -> (x..., y...), d, t)
    return map(x -> DateTime(x...), dt)
    end
    function parse_time_strings(times)
    h = map(x -> tryparse(Int64, x[1:2]), times)
    m = map(x -> tryparse(Int64, x[3:4]), times)
    s = map(x -> tryparse(Int64, x[5:6]), times)
    return zip(h, m, s) |> collect
    end
    function parse_date_strings(dates)
    if length(dates[1]) == 8
    y = map(x -> tryparse(Int64, x[1:4]), dates)
    m = map(x -> tryparse(Int64, x[5:6]), dates)
    d = map(x -> tryparse(Int64, x[7:8]), dates)
    elseif length(dates[1]) == 6
    y, m, d = parse_short_date_strings(dates)
    else
    error("Date is not 8 or 6 digits long")
    end
    return zip(y, m, d) |> collect
    end
    # 6 digit dates
    function parse_short_date_strings(dates)
    @assert length(dates) > 1 "Not enough files to work out YYMMDD v DDMMYY"
    x1 = map(x -> tryparse(Int64, x[1:2]), dates)
    m = map(x -> tryparse(Int64, x[3:4]), dates)
    x2 = map(x -> tryparse(Int64, x[5:6]), dates)
    if length(dates) > 1
    length(unique(x2)) >= length(unique(x1)) ? (y, d) = ((x1 .+ 2000), x2) :
    (y, d) = ((x2 .+ 2000), x1) #assumes year was in 2000's, if 1 file or variance in y and y is equal, it assumes DDMMYY
    else
    @info "Not enough files to work out YYMMDD v DDMMYY, assuming DDMMYY" #check this is sensible I think DDMMYY is most common
    y, d = (x2 .+ 2000), x1
    end
    return y, m, d
    end