initial commit after ripping ML out of Skraak, it does compile on Mac Julia 1.10

[?]
AEj8dahVWy718uSSFPe9VSRJ5qX5G8pC2zvFzJJ8yzBd
Nov 10, 2024, 1:31 AM
FCL6FKHMM6LX7HNI7F3CH4GRUIXCC2APQIZUKTJSLSIS6SRP2SPQC

Dependencies

Change contents

  • file addition: src (d--r------)
    [2.1]
  • file addition: Train.jl (----------)
    [0.1]
    # Train.jl
    export train #beware Flux.train! is not Skraak.train
    import Base: length, getindex
    import MLBase
    using CUDA, Dates, Images, Flux, Glob, JLD2, Noise
    using Random: shuffle!, seed!
    using Metalhead: ResNet
    #=
    function train(
    model_name::String,
    train_epochs::Int64,
    images::Vector{String},
    pretrain::Model=true,
    train_test_split::Float64 = 0.8,
    batch_size::Int64 = 64,
    )
    Note:
    Dont forget temp env, julia -t 4
    Assumes 224x224 pixel RGB images as png's
    Saves jld2's in current directory
    Use like:
    using Skraak, Glob
    images = Glob.glob("kiwi_set*/*/[N,K]/*.png") #11699814-element Vector{String}
    model = "/media/david/SSD2/PrimaryDataset/model_K1-9_original_set_CPU_epoch-7-0.9924-2024-03-05.jld2"
    train("K1-10_total_set_no_augumentation", 2, images, model, 0.97, 64)
    images = Glob.glob("*/[D,F,M,N]/*.png") #from SSD2/Clips
    model = "/media/david/SSD2/PrimaryDataset/model_K1-5_CPU_epoch-6-0.9795-2023-12-16.jld2"
    train("DFMN1-5", 20, images, model)
    =#
    const LABELTOINDEX::Dict{String,Int32} = Dict()
    Model = Union{Bool,String}
    function train(
    model_name::String,
    train_epochs::Int64,
    images::Vector{String}, #glob_pattern::String = "*/*.png"
    pretrain::Model = true,
    train_test_split::Float64 = 0.8,
    batch_size::Int64 = 64,
    )
    epochs = 1:train_epochs
    #images = Glob.glob(glob_pattern) #|> shuffle! |> x -> x[1:640]
    @assert !isempty(images) "No png images found"
    @info "$(length(images)) images in dataset"
    label_to_index = labels_to_dict(images)
    register_label_to_index!(label_to_index)
    @info "Text labels translate to: " label_to_index
    classes = length(label_to_index)
    @assert classes >= 2 "At least 2 label classes are required, for example: kiwi, not_kiwi"
    @info "$classes classes in dataset"
    @info "Device: $device"
    ceiling = seil(length(images), batch_size)
    train_test_index = train_test_idx(ceiling, batch_size, train_test_split)
    train, train_sample, test = process_data(images, train_test_index, ceiling, batch_size)
    @info "Made data loaders"
    model = load_model(pretrain, classes)
    @info "Loaded model"
    opt = Flux.setup(Flux.Optimisers.Adam(1e-5), model)
    @info "Setup optimiser"
    @info "Training for $epochs epochs: " now()
    training_loop!(
    model,
    opt,
    train,
    train_sample,
    test,
    epochs,
    model_name,
    classes,
    label_to_index,
    )
    @info "Finished $(last(epochs)) epochs: " now()
    end
    struct ImageContainer{T<:Vector}
    img::T
    end
    struct ValidationImageContainer{T<:Vector}
    img::T
    end
    Container = Union{ImageContainer,ValidationImageContainer}
    function seil(n::Int, batch_size::Int)
    return n ÷ batch_size * batch_size
    end
    function train_test_idx(ceiling::Int, batch_size::Int, train_test_split::Float64)::Int
    t =
    #! format: off
    ceiling ÷ batch_size * train_test_split |>
    round |>
    x -> x * batch_size |>
    x -> convert(Int, x)
    #! format: on
    end
    function labels_to_dict(list::Vector{String})::Dict{String,Int32}
    l =
    #! format: off
    map(x -> split(x, "/")[end-1], list) |>
    unique |>
    sort |>
    x -> zip(x, 1:length(x)) |>
    Dict
    #! format: on
    return l
    end
    """
    register_label_to_index!(label_to_index::Dict{String,Int32})
    This will replace the content of the global variable LABELTOINDEX
    with the content intended by the caller.
    Thanks algunion
    https://discourse.julialang.org/t/dataloader-scope-troubles/105207/4
    """
    function register_label_to_index!(label_to_index::Dict{String,Int32})
    empty!(LABELTOINDEX)
    merge!(LABELTOINDEX, label_to_index)
    end
    device = CUDA.functional() ? gpu : cpu
    function process_data(array_of_file_names, train_test_index, ceiling, batch_size)
    seed!(1234)
    images = shuffle!(array_of_file_names)
    train =
    ImageContainer(images[1:train_test_index]) |> x -> make_dataloader(x, batch_size)
    train_sample =
    ValidationImageContainer(images[1:(ceiling-train_test_index)]) |>
    x -> make_dataloader(x, batch_size)
    test =
    ValidationImageContainer(images[train_test_index+1:ceiling]) |>
    x -> make_dataloader(x, batch_size)
    return train, train_sample, test
    end
    length(data::ImageContainer) = length(data.img)
    length(data::ValidationImageContainer) = length(data.img)
    function getindex(data::ImageContainer{Vector{String}}, index::Int)
    path = data.img[index]
    img =
    #! format: off
    Images.load(path) |>
    #x -> Images.imresize(x, 224, 224) |>
    #x -> Images.RGB.(x) |>
    x -> Noise.add_gauss(x, (rand() * 0.2)) |>
    x -> apply_mask!(x, 3, 3, 12) |>
    x -> collect(channelview(float32.(x))) |>
    x -> permutedims(x, (3, 2, 1))
    #! format: on
    y = LABELTOINDEX[(split(path, "/")[end-1])]
    return img, y
    end
    function getindex(data::ValidationImageContainer{Vector{String}}, index::Int)
    path = data.img[index]
    img =
    #! format: off
    Images.load(path) |>
    #x -> Images.imresize(x, 224, 224) |>
    #x -> Images.RGB.(x) |>
    x -> collect(channelview(float32.(x))) |>
    x -> permutedims(x, (3, 2, 1))
    #! format: on
    y = LABELTOINDEX[(split(path, "/")[end-1])]
    return img, y
    end
    # assumes 224px square images
    function apply_mask!(
    img::Array{RGB{N0f8},2},
    max_number::Int = 3,
    min_size::Int = 3,
    max_size::Int = 22,
    )
    # horizontal
    for range in get_random_ranges(max_number, min_size, max_size)
    img[range, :] .= RGB{N0f8}(0.7, 0.7, 0.7)
    end
    # vertical
    for range in get_random_ranges(max_number, min_size, max_size)
    img[:, range] .= RGB{N0f8}(0.7, 0.7, 0.7)
    end
    return img
    end
    # assumes 224px square images
    function get_random_ranges(max_number::Int, min_size::Int, max_size::Int)
    number = rand(0:max_number)
    ranges = []
    while length(ranges) < number
    start = rand(1:224)
    size = rand(min_size:max_size)
    if start + size > 224
    continue
    end
    push!(ranges, start:start+size)
    end
    return ranges
    end
    function make_dataloader(container::Container, batch_size::Int)
    data =
    Flux.DataLoader(container; batchsize = batch_size, collate = true, parallel = true)
    device == gpu ? data = CuIterator(data) : nothing
    return data
    end
    # see load_model() from predict, and below
    function load_model(pretrain::Bool, classes::Int64)
    fst = Metalhead.ResNet(18, pretrain = pretrain).layers
    lst = Flux.Chain(AdaptiveMeanPool((1, 1)), Flux.flatten, Dense(512 => classes))
    model = Flux.Chain(fst[1], lst) |> device
    return model
    end
    #If model classes == desired classes I don't empty the last layer
    #That means that I can just train from where I left off for new data, DFMN model
    #Could be a gotcha if I want to train a different 4 class model, no need for a switch just yet
    function load_model(model_path::String, classes::Int64)
    model_state = JLD2.load(model_path, "model_state")
    model_classes = length(model_state[1][2][1][3][2])
    f = Metalhead.ResNet(18, pretrain = false).layers
    l = Flux.Chain(AdaptiveMeanPool((1, 1)), Flux.flatten, Dense(512 => model_classes))
    m = Flux.Chain(f[1], l)
    Flux.loadmodel!(m, model_state)
    if classes == model_classes
    model = m |> device
    else
    fst = m.layers
    lst = Flux.Chain(AdaptiveMeanPool((1, 1)), Flux.flatten, Dense(512 => classes))
    model = Flux.Chain(fst[1], lst) |> device
    end
    return model
    end
    function evaluate(m, d, c)
    good = 0
    count = 0
    pred = Int64[]
    actual = Int64[]
    for (x, y) in d
    p = Flux.onecold(m(x))
    good += sum(p .== y)
    count += length(y)
    append!(pred, p)
    append!(actual, y)
    end
    accuracy = round(good / count, digits = 4)
    confusion_matrix = MLBase.confusmat(c, actual, pred)
    #freqtable(DataFrames.DataFrame(targets = actual, predicts = pred), :targets, :predicts)
    #roc=MLBase.roc(actual, pred, 100)
    #f1=MLBase.f1score(roc)
    return accuracy, confusion_matrix #, roc, f1
    end
    function train_epoch!(model; opt, train, classes)
    Flux.train!(model, train, opt) do m, x, y
    Flux.Losses.logitcrossentropy(m(x), Flux.onehotbatch(y, 1:classes))
    end
    end
    function dict_to_text_file(dict, model_name)
    text = ""
    for (key, value) in dict
    text = text * "$(key) => $(value)\n"
    end
    open("labels_$(model_name)-$(today()).txt", "w") do file
    write(file, text)
    end
    @info "Saved labels to file for future reference"
    end
    function training_loop!(
    model,
    opt,
    train,
    train_sample,
    test,
    epochs::UnitRange{Int64},
    model_name::String,
    classes,
    label_to_index,
    )
    @time eval, vcm = evaluate(model, test, classes)
    @info "warm up accuracy" accuracy = eval
    @info "warm up confusion matrix" vcm
    a = 0
    for epoch in epochs
    println("")
    @info "Starting Epoch: $epoch"
    epoch == 1 && dict_to_text_file(label_to_index, model_name)
    @time train_epoch!(model; opt, train, classes)
    @time train_accuracy, train_confusion_matrix =
    evaluate(model, train_sample, classes)
    @info "Epoch: $epoch"
    @info "train" accuracy = train_accuracy
    @info "train" train_confusion_matrix
    @time test_accuracy, test_confusion_matrix = evaluate(model, test, classes)
    @info "test" accuracy = test_accuracy
    @info "test" test_confusion_matrix
    # number kiwi guessed right, assumes kiwi=1, not=2 (alphabetical)
    #test_confusion_matrix[1,1] > a && begin
    #a = test_confusion_matrix[1,1]
    let _model = cpu(model)
    jldsave(
    "/media/david/SSD2/model_$(model_name)_CPU_epoch-$epoch-$test_accuracy-$(today()).jld2";
    model_state = Flux.state(_model),
    )
    @info "Saved a best_model"
    end
    #end
    end
    end
  • file addition: SkraakML.jl (----------)
    [0.1]
    module SkraakML
    greet() = print("Hello World!")
    end # module SkraakML
  • file addition: Predict.jl (----------)
    [0.1]
    # Predict.jl
    export predict
    export get_images_from_audio
    using WAV,
    DSP, Images, ThreadsX, Dates, DataFrames, CSV, Flux, CUDA, Metalhead, JLD2, FLAC, Glob
    import Base: length, getindex
    ##Dependency, duplicated from Utility
    function _resample_to_16000hz(signal, freq)
    signal = DSP.resample(signal, 16000.0f0 / freq; dims = 1)
    freq = 16000
    return signal, freq
    end
    ##Dependency, duplicated from Clips
    function _get_image_from_sample(sample, f) #sample::Vector{Float64}
    S = DSP.spectrogram(sample, 400, 2; fs = convert(Int, f))
    i = S.power
    if minimum(i) == 0.0
    l = i |> vec |> unique |> sort
    replace!(i, 0.0 => l[2])
    end
    image =
    #! format: off
    DSP.pow2db.(i) |>
    x -> x .+ abs(minimum(x)) |>
    x -> x ./ maximum(x) |>
    x -> reverse(x, dims = 1) |>
    x -> PerceptualColourMaps.applycolourmap(x, cmap("L4")) |>
    #x -> RGB.(x) |>
    x -> imresize(x, 224, 224) |>
    x -> Float32.(x)
    #! format: on
    return image
    end
    """
    predict(glob_pattern::String, model::String)
    This function takes a glob pattern for folders (or a vector of folders) to run over, and a model path. It saves results in a csv in each folder, similar to opensoundscape
    Args:
    • glob pattern (folder/) or a vector of folders
    • model path
    Returns: Nothing - This function saves csv files.
    I use this function to find kiwi from new data gathered on a trip. And to predict D/F/M/N for images clipped from primary detections.
    It works on both audio (wav or flac) and png images.
    Note:
    From Pomona-3/Pomona-3/
    julia -t 4
    Dont forget temp environment: ] activate --temp
    Use like:
    using Skraak
    glob_pattern = "*/*/"
    model = "/media/david/SSD2/PrimaryDataset/model_K1-9_original_set_CPU_epoch-7-0.9924-2024-03-05.jld2"
    glob_pattern = "Clips_2024-10-21/"
    model = "/media/david/SSD1/Clips/model_DFMN1-5_CPU_epoch-18-0.9132-2024-01-29.jld2"
    predict(glob_pattern, model)
    """
    function predict(glob_pattern::String, model::String)
    model = load_model_pred(model) |> device
    folders = Glob.glob(glob_pattern)
    @info "Folders: $folders"
    for folder in folders
    @info "Working on: $folder"
    predict_folder(folder, model)
    end
    end
    function predict(folders::Vector{String}, model::String)
    model = load_model_pred(model) |> device
    @info "Folders: $folders"
    for folder in folders
    @info "Working on: $folder"
    predict_folder(folder, model)
    end
    end
    #~~~~~ The guts ~~~~~#
    # see load_model() from train, different input types
    function load_model_pred(model_path::String)
    model_state = JLD2.load(model_path, "model_state")
    model_classes = length(model_state[1][2][1][3][2])
    @info "Model classes: $model_classes"
    f = Metalhead.ResNet(18, pretrain = false).layers
    l = Flux.Chain(AdaptiveMeanPool((1, 1)), Flux.flatten, Dense(512 => model_classes))
    model = Flux.Chain(f[1], l)
    Flux.loadmodel!(model, model_state)
    return model
    end
    #=
    function load_bson(model_path::String)
    BSON.@load model_path model
    end
    =#
    function predict_folder(folder::String, model)
    wav = Glob.glob("$folder/*.[W,w][A,a][V,v]")
    flac = Glob.glob("$folder/*.flac")
    audio_files = vcat(wav, flac) #if wav and flac both present will predict on all
    png_files = Glob.glob("$folder/*.png")
    #it will predict on images when both images and audio present
    if isempty(png_files)
    length(audio_files) > 0 ? predict_audio_folder(audio_files, model, folder) :
    @info "No png, flac, wav, WAV files present in $folder"
    else
    predict_image_folder(png_files, model, folder)
    end
    end
    device = CUDA.functional() ? gpu : cpu
    # Predict from png images
    struct PredictImageContainer{T<:Vector}
    img::T
    end
    length(data::PredictImageContainer) = length(data.img)
    function getindex(data::PredictImageContainer{Vector{String}}, idx::Int)
    path = data.img[idx]
    img =
    #! format: off
    Images.load(path) |>
    x -> Images.imresize(x, 224, 224)|>
    x -> Images.RGB.(x) |>
    x -> collect(channelview(float32.(x))) |>
    x -> permutedims(x, (3, 2, 1))
    #! format: on
    return img, path
    end
    function predict_image_folder(png_files::Vector{String}, model, folder::String)
    l = length(png_files)
    @assert (l > 0) "No png files present in $folder"
    @info "$(l) png_files in $folder"
    save_path = "$folder/preds-$(today()).csv"
    loader = png_loader(png_files)
    @time preds, files = predict_pngs(model, loader)
    f = split.(files, "/") |> x -> last.(x)
    df = DataFrames.DataFrame(file = f, label = preds)
    CSV.write("$save_path", df)
    end
    function png_loader(png_files::Vector{String})
    loader = Flux.DataLoader(
    PredictImageContainer(png_files);
    batchsize = 64,
    collate = true,
    parallel = true,
    )
    device == gpu ? loader = CuIterator(loader) : nothing
    return loader
    end
    function predict_pngs(m, d)
    @info "Predicting..."
    pred = []
    path = []
    for (x, pth) in d
    p = Flux.onecold(m(x))
    append!(pred, p)
    append!(path, pth)
    end
    return pred, path
    end
    # Predict from audio files
    function predict_audio_folder(audio_files::Vector{String}, model, folder::String)
    l = length(audio_files)
    @assert (l > 0) "No wav or flac audio files present in $folder"
    @info "$(l) audio_files in $folder"
    df = DataFrames.DataFrame(
    file = String[],
    start_time = Float64[],
    end_time = Float64[],
    label = Int[],
    )
    save_path = "$folder/preds-$(today()).csv"
    CSV.write("$save_path", df)
    for file in audio_files
    df = predict_audio_file(file, model)
    CSV.write("$save_path", df, append = true)
    end
    end
    function predict_audio_file(file::String, model)
    #check form of opensoundscape preds.csv and needed by my make_clips
    @info "File: $file"
    @time data = audio_loader(file)
    pred = []
    time = []
    @time for (x, t) in data
    p = Flux.onecold(model(x))
    append!(pred, p)
    append!(time, t)
    end
    f = (repeat(["$file"], length(time)))
    df = DataFrames.DataFrame(
    :file => f,
    :start_time => first.(time),
    :end_time => last.(time),
    :label => pred,
    )
    sort!(df)
    return df
    end
    function audio_loader(file::String, increment::Int = 5, divisor::Int = 2)
    raw_images, n_samples = get_images_from_audio(file::String, increment, divisor)
    images = reshape_images(raw_images, n_samples)
    # Start time and end time for each 5s audio clip, in seconds relative to the start of the file.
    start_time = 0:(increment/divisor):(n_samples-1)*(increment/divisor)
    end_time = increment:(increment/divisor):(n_samples+1)*(increment/divisor)
    time = collect(zip(start_time, end_time))
    loader = Flux.DataLoader((images, time), batchsize = n_samples, shuffle = false)
    device == gpu ? loader = CuIterator(loader) : nothing #check this works with gpu
    return loader
    end
    function reshape_images(raw_images, n_samples)
    images =
    #! format: off
    hcat(raw_images...) |>
    x -> reshape(x, (224, 224, 3, n_samples))
    #! format: on
    return images
    end
    #= not needed
    function get_image_for_inference(sample, f)
    image =
    #! format: off
    _get_image_from_sample(sample, f) |>
    # x -> collect(channelview(float32.(x))) |>
    x -> permutedims(x, (3, 2, 1))
    #! format: on
    return image
    end
    =#
    # need to change divisor to a overlap fraction, chech interaction with audioloader()
    # if divisor is 0, then no overlap atm
    function get_images_from_audio(file::String, increment::Int = 5, divisor::Int = 2) #5s sample, 2.5s hop
    signal, freq = load_audio_file(file)
    if freq > 16000
    signal, freq = _resample_to_16000hz(signal, freq)
    end
    f = convert(Int, freq)
    inc = increment * f
    #hop = f * increment ÷ divisor #need guarunteed Int, maybe not anymore, refactor
    hop = 0 #f * increment / divisor |> x -> x == Inf ? 0 : trunc(Int, x)
    split_signal = DSP.arraysplit(signal[:, 1], inc, hop)
    raw_images = ThreadsX.map(x -> _get_image_from_sample(x, f), split_signal)
    n_samples = length(raw_images)
    return raw_images, n_samples
    end
    function load_audio_file(file::String)
    ext = split(file, ".")[end]
    @assert ext in ["WAV", "wav", "flac"] "Unsupported audio file type, requires wav or flac."
    if ext in ["WAV", "wav"]
    signal, freq = WAV.WAV.wavread(file)
    else
    signal, freq = load(file)
    end
    @assert !isempty(signal[:, 1]) "$file seems to be empty, could it be corrupted?\nYou could delete it, or replace it with a known\ngood version from SD card or backup."
    return signal, freq
    end
    ############### PYTHON Opensoundscape ################
    #=
    # Python 3.8.12, opensoundscape 0.7.1
    # Dont forget conda activate opensoundscape
    # Dont forget to modify file names and glob pattern
    # Run script in Pomona-2, hard code trip date in the glob
    # python /media/david/USB/Skraak/src/predict.py
    from opensoundscape.torch.models.cnn import load_model
    import opensoundscape
    import torch
    from pathlib import Path
    import numpy as np
    import pandas as pd
    from glob import glob
    import os
    from datetime import datetime
    model = load_model('/home/david/best.model0')
    # folders = Glob.glob('./*/2023-?????/')
    # folders = Glob.glob('./*/*/2024-05-0?')
    folders = Glob.glob('./*/2024-10-18/')
    for folder in folders:
    os.chdir(folder)
    print(folder, ' start: ', datetime.now())
    # Beware, secretary island files are .wav
    field_recordings = Glob.glob('./*.[W,w][A,a][V,v]')
    scores, preds, unsafe = model.predict(
    field_recordings,
    binary_preds = 'single_target',
    overlap_fraction = 0.5,
    batch_size = 128,
    num_workers = 12)
    scores.to_csv("scores-2024-10-21.csv")
    preds.to_csv("preds-2024-10-21.csv")
    os.chdir('../..') # Be careful this matches the glob on line 284
    print(folder, ' done: ', datetime.now())
    print()
    print()
    =#
    #=Kahurangi
    folders = Glob.glob('./*/')
    for folder in folders:
    os.chdir(folder)
    print(folder, ' start: ', datetime.now())
    # Beware, secretary island files are .wav
    field_recordings = Glob.glob('./*.[W,w][A,a][V,v]')
    scores, preds, unsafe = model.predict(
    field_recordings,
    binary_preds = 'single_target',
    overlap_fraction = 0.5,
    batch_size = 128,
    num_workers = 12)
    scores.to_csv("scores-2024-10-21.csv")
    preds.to_csv("preds-2024-10-21.csv")
    os.chdir('./..') # Be careful this matches the glob on line 284
    print(folder, ' done: ', datetime.now())
    print()
    print()
    =#
  • file addition: README.md (----------)
    [2.1]
    # Skraak
    Identify bird calls using AI, and monitor call frequency.
    __Skraak is intended to be simple to use for simple people like me.__
    This package serves [skraak.kiwi](https://skraak.kiwi).
    Most of the skraak.kiwi data has been recorded using Open Acoustics AudioMoth's or μMoth's at 16000 Hz. DOC recorders at 8000hz work fine.
    It is a good idea to use an Nvidia GPU. Everything should work fine on CPU, just slow.
    AMD and Mac Silicone GPU's are not supported but should be easy for you to get working with julia AMD or Metal packages.
    If you are doing serious work, start the julia repl with: julia -t n where n is up to 1/2 the number of cores you have. I do 4, this is enough to keep up with a gamer style GPU.
    __You can use Skraak too.__
    ```
    [Install Julia](https://julialang.org/downloads/platform/), Julia-1.10 or newer
    [git clone the Skraak project](https://github.com/quietlight/Skraak), if you dont have git or the git cli, you can download a zip file by clicking the <code> button.
    cd to your Skraak folder
    start the julia repl with $julia
    (You will want to install Revise and OhMyREPL, just do 'using Revise, OhMyREPL' in the Julia repl, add 'using Revise, OhMyREPL' to ~/.julia/config/startup.jl)
    type: ] (to enter Pkg mode)
    type: activate .
    type: instantiate
    backspace to exit Pkg mode
    exit repl with ctrl-D.
    ```
    Later:
    ```
    start the julia repl with $julia
    type: ] (to enter Pkg mode)
    type: dev path/to/Skraak (to make it a local package)
    backspace to exit Pkg mode
    type: using Skraak, Glob (glob is only here to help you refine your glob patterns)
    WORK...
    When finished working you can if you like do 'free Skraak' in Pkg mode (accessed with ']')
    ```
    1. Take some WAV's organised into a file structure LOCATION/TRIP_DATE/WAV_FILES
    2. and labels saved in a csv in the form:
    * file(String),start_time,end_time,label(Integer) (where start_time and end_time are in seconds from the start of the wav file)
    * at least 2 label classes are required, for example Kiwi, Not
    3. Generate a primary dataset of spectrogram images with the following file structure:
    * DATASET/AUDIO_FILE*/LABEL*/PNG's (png files must be 224X224 px square RGB).
    * This structure is required, when training, __the parent folder of a file is the label__.
    * This function creates a folder for each file, creates subfolders for each label, then saves png files in the appropriate label sub folder.
    * Space is needed. It uses the whole audio file. (I aim for 96% Not, 4% Kiwi)
    * and saves a flac copy for reference
    > I use labels, [K, N] in words [Kiwi, Not]. Anything will work, the unique text labels are sorted alphabetically and mapped to integer labels in the training process.
    > More than 2 label classes is fine, but keep it simple until you have a lot of data.
    > It is better __not__ to have everything in big folders, 100_000 files in a folder on a Fat32 removable drive will rapidly grind to a stand still.
    > You could have many thousands of K and N folders, for example, the model does not care.
    > Native file systems on mac/linux will work ok. I use ext4 (linux) file systems on exteranl SSD's for both linux and mac.
    ```
    ```
    4. Train a Resnet18 model, either pretrained on Imagenet, or preferably the pretrained Skraak Kiwi model, which is currently trained on 7_700_000 images.
    Skraak trains on 5 second clips, converted to 224x224 pixel RGB spectrogram images.
    ```
    using Skraak
    glob_pattern_1 = "Clips*/[D,F,M,N]/*.png" #for example. Note: requires png's as input.
    glob_pattern_2 = "Dataset*/[K, N]/*.png"
    # Train a model named Test1 for 2 epochs on png files found by glob_pattern,
    # start with a pretrained model.
    train("Test1", 2, glob_pattern_1, true)
    # Train a model named Test2 for 2 epochs on png files found by glob_pattern,
    # train using model found at "path/to/model.jld2"
    train("Test2", 2, glob_pattern_2, "path/to/model.jld2")
    # Note: Your unique text labels are sorted alphabetically, and converted to
    # integers, [1,2,3...] to be consumed by the flux model
    # A text file will be saved beside the model.jld2, with the label to
    # integer mapping.
    ```
    5. Run inference on raw data using a trained model
    Skraak will try to find png images first, in the folders covered by the glob pattern. If there are no png's found it will predict on wav or flac files, using 5 second audio clips, converted to 224x224 pixel RGB spectrogram images, with a 2.5 second hop.
    > You are responsible for providing an appropriate model.
    > I use a binary Kiwi/Not model for finding calls in audio data, and a Duet/Female/Male/Not model on png clips made from calls detected by the binary model.
    > Find some models to start with in the Models folder
    ```
    using Skraak
    glob_pattern = "*/*/" #Note: requires folders as input. Folders contain flac, wav or png files.
    # Predict label classes of png, wav or flac files found in folders specified by
    # glob_pattern using model.jld2. A preds.csv file is saved in current directory
    predict(glob_pattern, "path/to/model.jld2")
    ```
    6. Generate audio clips and spectrogram images of all calls found.
    ```
    # Make clips from a preds.csv file of the form:
    # file(String),start_time,end_time,label(Int)
    # 1 is the label, it can be any int present in the label field of preds.csv
    # It saves clips in a folder 'Clips_2023-11-09'
    make_clips("preds.csv", 1)
    ```
    7. Sort calls into subclasses (say: Duet, Female, Male, Nothing) manually, or using a model combined with human supervision. TODO
    8. Store data from calls and file metadata in a DuckDB database for statistical analysis using SQL, DataFrames, Plots.
    ```
    I will not document this until the DuckDB storage api has stabilised.
    For now always store a csv backup using "EXPORT DATABASE 'Backup_2023-10-10';" in the duckdb cli.
    I highly recommend storing data in a duckdb database.
    Querying a duckdb database with SQL is faster than even julia DataFrames, both leave Pandas in the dust.
    ```
    9. Repeat, iterating on your models as you accumulate more data. It's hard until it gets easy.
    Managing datasets is like gardening, it takes some weeding and a _lot_ of compost (aka data) to get a good model growing.
    Julia is great for machine learning because it is realtively simple to get a GPU working. It does have disadvantages at GPT-4 scale, but for this kind of work it is excelent. Julia shines with any scientific computing task.
  • file addition: Project.toml (----------)
    [2.1]
    name = "SkraakML"
    uuid = "960381bc-3737-4297-a0a0-71f7f33f3c12"
    authors = ["David Cary <cdecary@gmail.com>"]
    version = "0.1.0"
    [deps]
    CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
    CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
    DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2"
    DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
    Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
    FLAC = "abae9e3b-a9a0-4778-b5c6-ca109b507d99"
    Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
    Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
    ImageTransformations = "02fcd773-0e25-5acc-982a-7f6622650795"
    JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
    MLBase = "f0e99cf1-93fa-52ec-9ecc-5026115318e0"
    Metalhead = "dbeba491-748d-5e0e-a39e-b530a07fa0cc"
    Noise = "81d43f40-5267-43b7-ae1c-8b967f377efa"
    PerceptualColourMaps = "54e51dfa-9dd7-5231-aa84-a4037b83483a"
    Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
    ThreadsX = "ac1d9e8a-700a-412c-b207-f0111f4b6c0d"
    WAV = "8149f6b0-98f6-5db9-b78f-408fbbb8ef88"
    cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
  • file addition: LICENSE (---r------)
    [2.1]
    MIT License
    Copyright (c) 2023 David Cary <cdecary@gmail.com> and contributors
    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to deal
    in the Software without restriction, including without limitation the rights
    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    copies of the Software, and to permit persons to whom the Software is
    furnished to do so, subject to the following conditions:
    The above copyright notice and this permission notice shall be included in all
    copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    SOFTWARE.