work on construct primary dataset

quietlight
Mar 5, 2024, 3:01 AM
PQ6OQCBQUJMAGTITCWSBFIKCGW3BO7ZOBQ2AB7DQOIUNZH2KICYAC

Dependencies

  • [2] FY7CEMM2 mac work on train.jl
  • [3] ZGLDIQ4G rolled back reduced and bifurcated training set used to test models before going away
  • [4] BOPNWZL4 refactored Skraak.jl into sub files, tidy now
  • [5] QPBH7QWC added MLBaste for confusion matrix, f1, roc, must still remove freq tables
  • [6] G4IN2F2T
  • [7] VLYXSYAD added .ignore
  • [8] E5JDMNIA added kiwi 1-4 model
  • [9] EDYR5C55 fixed ConstructPrimaryDataSet, licence date, Readme, re-arranged Predict.jl
  • [10] E3Y55MPR added perceptual colour maps and changed get_image_from_sample
  • [11] 3FAESP6N catch up with changes, mainly construct primary dataset
  • [12] RBXUHIO2 made somn small changes and reverted them mostly
  • [13] NV7FXZ5Q first commit
  • [14] 4BTZNCRM catch up commit before starting work
  • [15] ETOIK7VE recording changes, but this does not work anymore
  • [16] U46LDPL7 added model, CUDA works again now on ubuntu
  • [17] NMQCXLNG catch up commit, about to do some work on prediction loop

Change contents

  • file deletion: .gitignore (----------)
    [3.1][2.36:70](),[2.70][2.1:1]()
    Manifest.toml
    ._*
    .pijul
    .ignore
  • file deletion: .JuliaFormatter.toml (---r------)
    [3.1][2.165:209](),[2.209][2.72:72]()
    remove_extra_newlines = true
    format_docstrings = true
    verbose = true
    format_markdown = true
  • replacement in src/Train.jl at line 28
    [3.336198][3.6:126](),[3.126][3.21:81]()
    glob_pattern = "original_set/*/[N,K]/*.png" #from SSD2/PrimaryDataset 7758648 hard coded a random selection of not, 1:1
    train("K1-9_original_set", 2, glob_pattern, true, 0.90, 64)
    [3.336198]
    [3.0]
    glob_pattern = "original_set*/*/[N,K]/*.png" #from SSD2/PrimaryDataset 7758648 hard coded a random selection of not, 1:1
    train("K1-9_original_set", 20, glob_pattern, true, 0.90, 64)
  • replacement in src/Train.jl at line 52
    [3.336948][2.478:539]()
    images = glob(glob_pattern) |> shuffle! |> x -> x[1:640]
    [3.336948]
    [3.337010]
    images = glob(glob_pattern) #|> shuffle! |> x -> x[1:640]
  • replacement in src/Predict.jl at line 33
    [3.22][3.440:537]()
    model = "/media/david/SSD2/PrimaryDataset/model_K1-8_Colour_CPU_epoch-14-0.9129-2024-02-16.jld2"
    [3.22]
    [3.367291]
    model = "/media/david/SSD2/PrimaryDataset/model_K1-9_original_set_CPU_epoch-7-0.9924-2024-03-05.jld2"
  • replacement in src/Predict.jl at line 81
    [3.1119][3.1119:1176]()
    predict_audio_folder(audio_files, model, folder)
    [3.1119]
    [3.1176]
    length(audio_files) > 0 ? predict_audio_folder(audio_files, model, folder) : @info "No png, flac, wav, WAV files present in $folder"
  • edit in src/ConstructPrimaryDataset.jl at line 2
    [3.72]
    [3.2433]
    #unfinished
  • replacement in src/ConstructPrimaryDataset.jl at line 9
    [3.373][3.373:389]()
    save_pngs()
    [3.373]
    [3.389]
    save_pngs
  • edit in src/ConstructPrimaryDataset.jl at line 55
    [3.376687]
    [3.2952]
    # work needed to make it save in correct place, currently saves relativ to pwd, this is incorrect relative to the previous function
  • replacement in src/ConstructPrimaryDataset.jl at line 62
    [3.6394][3.6394:6411]()
    for f in gdf
    [3.6394]
    [3.6411]
    for f in gdf #where f = the file
  • edit in src/ConstructPrimaryDataset.jl at line 66
    [3.377051][3.6534:6595](),[3.6595][3.378848:378849](),[3.378848][3.378848:378849]()
    kiwi = f.kiwi
    @info (folder, duration, kiwi)
  • replacement in src/ConstructPrimaryDataset.jl at line 67
    [3.6665][3.6665:6748]()
    signal, freq = Skraak.load_audio_file("kiwi_set_2023-11-13/$folder/$file")
    [3.6665]
    [3.6748]
    signal, freq = Skraak.load_audio_file("$folder/$file")
  • replacement in src/ConstructPrimaryDataset.jl at line 69
    [3.6787][3.6787:6827]()
    duration = length_signal / freq
    [3.6787]
    [3.378849]
    duration = length_signal ÷ freq
  • replacement in src/ConstructPrimaryDataset.jl at line 71
    [3.378850][3.6828:6924]()
    mkpath("kiwi_set_2023-11-13/$folder/K")
    mkpath("kiwi_set_2023-11-13/$folder/N")
    [3.378850]
    [3.6924]
    mkpath("$folder/K")
    mkpath("$folder/N")
  • edit in src/ConstructPrimaryDataset.jl at line 75
    [3.6983]
    [3.6983]
    #get a list of start and end times for each clip, clumsy, need to fix this bit
    kiwi = collect(map(collect, zip(f.start_time, f.end_time)))
    @info (folder, duration, kiwi)
  • edit in src/ConstructPrimaryDataset.jl at line 94
    [3.7534][3.7534:7628]()
    #savefig(plot, "kiwi_set-2023-09-07/$folder/K/$folder-$start-$(start+4).png")
  • replacement in src/ConstructPrimaryDataset.jl at line 95
    [3.7659][3.7659:7742]()
    "kiwi_set_2023-11-13/$folder/K/$folder-$start-$(start+4).png",
    [3.7659]
    [3.7742]
    "$folder/K/$folder-$start-$(start+4).png",
  • edit in src/ConstructPrimaryDataset.jl at line 101
    [3.7864][3.7864:7958]()
    #savefig(plot, "kiwi_set-2023-09-07/$folder/N/$folder-$start-$(start+4).png")
  • replacement in src/ConstructPrimaryDataset.jl at line 102
    [3.7989][3.7989:8072]()
    "kiwi_set_2023-11-13/$folder/N/$folder-$start-$(start+4).png",
    [3.7989]
    [3.8072]
    "$folder/N/$folder-$start-$(start+4).png",
  • replacement in src/ConstructPrimaryDataset.jl at line 109
    [3.8203][3.8203:8248]()
    wdf = df[duration-4:duration, :]
    [3.8203]
    [3.8248]
    wdf = ldf[duration-4:duration, :]
  • edit in src/ConstructPrimaryDataset.jl at line 116
    [3.8546][3.8546:8643]()
    #savefig(plot, "kiwi_set-2023-09-07/$folder/$l/$folder-$(duration-4)-$duration.png")
  • replacement in src/ConstructPrimaryDataset.jl at line 117
    [3.8670][3.8670:8756]()
    "kiwi_set_2023-11-13/$folder/$l/$folder-$(duration-4)-$duration.png",
    [3.8670]
    [3.8756]
    "$folder/$l/$folder-$(duration-4)-$duration.png",
  • edit in src/ConstructPrimaryDataset.jl at line 122
    [3.8812]
    [3.378850]
    end
    function calculate_clip(st::Int, en::Int, freq::Int32, len::Int)
    s=(st*freq)-freq+1
    en*freq <= len ? e=en*freq : e=len
    return s, e