catch up commit, train now working with julia 1.10.5 but not 1.11

quietlight
Oct 25, 2024, 3:33 AM
X54TLSYE7DYWGLQUI7AIZOABUCQ7WTHFC6G62UBA7RJZK4AYOJGAC

Dependencies

  • [2] JYCKLP2E changes to clips and predict
  • [3] E3Y55MPR added perceptual colour maps and changed get_image_from_sample
  • [4] FY7CEMM2 mac work on train.jl
  • [5] QPBH7QWC added MLBaste for confusion matrix, f1, roc, must still remove freq tables
  • [6] 27HKDBYT changet train api to take a list of images instead of glob pattern
  • [7] NV7FXZ5Q first commit
  • [8] RBXUHIO2 made somn small changes and reverted them mostly
  • [9] ETOIK7VE recording changes, but this does not work anymore
  • [10] U46LDPL7 added model, CUDA works again now on ubuntu
  • [11] ZGLDIQ4G rolled back reduced and bifurcated training set used to test models before going away
  • [12] MMG2PLXK new functiion to make clips of downloaded bisd calls
  • [13] 2UBDFCJH new files tracked
  • [14] NMQCXLNG catch up commit, about to do some work on prediction loop
  • [15] PQ6OQCBQ work on construct primary dataset
  • [*] ROFI4OLA catch up, working on colour images

Change contents

  • replacement in src/Train.jl at line 82
    [3.338053][3.146:169]()
    label_to_index
    [3.338053]
    [3.338077]
    label_to_index,
  • replacement in src/Train.jl at line 267
    [3.343556][3.343556:343579](),[3.343579][3.288:416]()
    confusion_matrix =
    MLBase.confusmat(c, actual, pred)
    #freqtable(DataFrame(targets = actual, predicts = pred), :targets, :predicts)
    [3.343556]
    [3.540]
    confusion_matrix = MLBase.confusmat(c, actual, pred)
    #freqtable(DataFrame(targets = actual, predicts = pred), :targets, :predicts)
  • replacement in src/Train.jl at line 312
    [3.344691][3.722:816]()
    @time train_accuracy, train_confusion_matrix = evaluate(model, train_sample, classes)
    [3.344691]
    [3.344774]
    @time train_accuracy, train_confusion_matrix =
    evaluate(model, train_sample, classes)
  • replacement in src/Train.jl at line 324
    [3.558][3.558:602](),[3.602][3.345118:345179](),[3.1138][3.345118:345179](),[3.1162][3.345118:345179](),[3.345118][3.345118:345179](),[3.345179][3.1163:1254](),[3.1254][3.345268:345399](),[3.345268][3.345268:345399]()
    #a = test_confusion_matrix[1,1]
    let _model = cpu(model)
    jldsave(
    "model_$(model_name)_CPU_epoch-$epoch-$test_accuracy-$(today()).jld2";
    model_state = Flux.state(_model),
    )
    @info "Saved a best_model"
    end
    [3.558]
    [3.603]
    #a = test_confusion_matrix[1,1]
    let _model = cpu(model)
    jldsave(
    "/media/david/SSD2/model_$(model_name)_CPU_epoch-$epoch-$test_accuracy-$(today()).jld2";
    model_state = Flux.state(_model),
    )
    @info "Saved a best_model"
    end
  • replacement in src/Predict.jl at line 35
    [3.356][3.38:73]()
    glob_pattern = "Clips_2024-06-26/"
    [3.356]
    [3.73]
    glob_pattern = "Clips_2024-10-21/"
  • edit in src/Predict.jl at line 260
    [3.373932]
    [3.373932]
    function resample_to_8000hz(signal, freq)
    signal = DSP.resample(signal, 8000.0f0 / freq; dims = 1)
    freq = 8000
    return signal, freq
    end
  • replacement in src/Predict.jl at line 286
    [3.374412][3.374412:374457]()
    model = load_model('/home/david/best.model')
    [3.374412]
    [3.374457]
    model = load_model('/home/david/best.model0')
  • replacement in src/Predict.jl at line 290
    [3.473][3.374495:374521](),[3.374495][3.374495:374521]()
    folders = glob('./*/*/')
    [3.473]
    [3.374521]
    folders = glob('./*/2024-10-18/')
  • replacement in src/Predict.jl at line 302
    [3.374910][2.47:131]()
    scores.to_csv("scores-2024-08-29.csv")
    preds.to_csv("preds-2024-08-29.csv")
    [3.374910]
    [2.131]
    scores.to_csv("scores-2024-10-21.csv")
    preds.to_csv("preds-2024-10-21.csv")
  • edit in src/Predict.jl at line 309
    [3.590]
    #=Kahurangi
    folders = glob('./*/')
    for folder in folders:
    os.chdir(folder)
    print(folder, ' start: ', datetime.now())
    # Beware, secretary island files are .wav
    field_recordings = glob('./*.[W,w][A,a][V,v]')
    scores, preds, unsafe = model.predict(
    field_recordings,
    binary_preds = 'single_target',
    overlap_fraction = 0.5,
    batch_size = 128,
    num_workers = 12)
    scores.to_csv("scores-2024-10-21.csv")
    preds.to_csv("preds-2024-10-21.csv")
    os.chdir('./..') # Be careful this matches the glob on line 284
    print(folder, ' done: ', datetime.now())
    print()
    print()
    =#
  • edit in src/FileMetaData.jl at line 9
    [3.4707]
    [3.4707]
    # needs SSD1 present for dawn_dusk.csv
  • replacement in src/FileMetaData.jl at line 12
    [3.4742][3.561:620]()
    folders=glob("*/2024-06-23/")
    for folder in folders[2:end]
    [3.4742]
    [3.4794]
    folders=glob("*/2024-10-18/")
    for folder in folders[3:end]
  • replacement in src/FileMetaData.jl at line 17
    [3.4855][3.621:707](),[3.707][3.4950:4995](),[3.821][3.4950:4995](),[3.4950][3.4950:4995]()
    CSV.write("/media/david/SSD3/New/pomona_files_20240627.csv", df; append=true)
    catch
    @warn "error with $folder"
    [3.4855]
    [3.4995]
    CSV.write("/media/david/Pomona-4/Pomona-4/pomona_files_20241018.csv", df; append=true)
    catch x
    @warn "$x error with $folder"
  • replacement in src/FileMetaData.jl at line 21
    [3.5003][3.708:737]()
    cd("/media/david/SSD3/New/")
    [3.5003]
    [3.5041]
    cd("/media/david/Pomona-4/Pomona-4/")
  • edit in src/FileMetaData.jl at line 23
    [3.5045][3.5045:5046](),[3.5046][3.738:792]()
    Then go into sublime and change drive New to Pomona-4
  • replacement in src/FileMetaData.jl at line 28
    [3.5143][3.5143:5226]()
    COPY pomona_files FROM '/media/david/Pomona-3/Pomona-3/pomona_files_20231019.csv';
    [3.5143]
    [3.5226]
    COPY pomona_files FROM '/media/david/Pomona-3/Pomona-3/pomona_files_20241018.csv';
  • edit in src/FileMetaData.jl at line 38
    [3.5452]
    [3.5452]
    To restore from backup:
    duckdb my_database.duckdb
    IMPORT DATABASE 'AudioDataBackup_2024-07-10';
  • replacement in src/Clips.jl at line 31
    [3.13208][3.13208:13339]()
    predictions = glob("*/2023-09-11*/preds*")
    predictions = glob("path/to/preds*")
    for file in predictions #[1:6][7:12][13:18][19:24]
    [3.13208]
    [3.13339]
    predictions = glob("*/2024-10-18/preds*")
    for file in predictions
  • edit in src/Clips.jl at line 357
    [17.709]
    [2.1062]
    =#
    #=
    For Kahurangi Data
    a=glob("*/*/*.csv")
    b=glob("*/*/*/*.csv")
    c=glob("*/*/*/*/*.csv")
    list=[a ; b ; c]
    # to delete empty preds.csv files
    for file in list
    size = stat(file).size
    if size < 10
    println("Deleting $file - $size")
    rm(file)
    end
    end
    ## change date on preds below (no longer required at 25/10/24)
    function make_clips_kahurangi(preds_path::String, label::Int = 1)
    # Assumes function run from Kahurangi Data
    #pth = replace(preds_path, "preds-2024-10-21.csv" => "")
    pth = split(preds_path, "/") |> x -> joinpath(x[1:end-1]) |> x -> x * "/"
    function assert_detections_present_(df::DataFrame, label::Int, preds_path)::DataFrame
    label in levels(df.label) ? (return df) :
    @error "No detections for label = $label at $preds_path"
    end
    # Load and group data frame by file
    gdf =
    #! format: off
    DataFrame(CSV.File(preds_path)) |>
    x -> Skraak.assert_not_empty(x, preds_path) |>
    x -> Skraak.rename_column!(x, "1.0", "label") |> #can remove now, needs to be label
    x -> assert_detections_present_(x, label, preds_path) |>
    x -> Skraak.filter_positives!(x, label) |>
    Skraak.group_by_file!
    #! format: on
    # Make clip and spectrogram
    for (k, v) in pairs(gdf)
    #file_name = chop(v.file[1], head = 2, tail = 4)
    file_name, extension = Skraak.path_to_file_string(v.file[1])
    start_times = v[!, :start_time] |> sort
    detections = Skraak.cluster_detections(start_times)
    isempty(detections) && continue
    signal, freq = Skraak.wavread("$pth$(file_name).$(extension)")
    if freq > 16000
    signal, freq = Skraak.resample_to_8000hz(signal, freq)
    end
    freq = freq |> Float32
    length_signal = length(signal)
    for detection in detections
    st, en = Skraak.calculate_clip_start_end(detection, freq, length_signal)
    p=chop(pth, tail=1) |> x -> replace(x, "/" => "---") #delete trailing / then replace / with ---
    #name = "$p-$file_name-$(Int(floor(st/freq)))-$(Int(ceil(en/freq)))"
    name = "$file_name-$(Int(floor(st/freq)))-$(Int(ceil(en/freq)))" #leave off path, not necesaray
    ##name = "$location-$h-$trip_date-$file_name-$(Int(floor(st/freq)))-$(Int(ceil(en/freq)))"
    f = "Clips_$(today())"
    mkpath(f)
    outfile = "$f/$name"
    sample = signal[Int(st):Int(en)]
    Skraak.wavwrite(sample, "$outfile.wav", Fs = Int(freq))
    image = Skraak.get_image_from_sample(sample, freq)
    PNGFiles.save("$outfile.png", image)
    end
    print(".")
    end
    print(".")
    end
    using Glob, Skraak, CSV, DataFrames, Dates, PNGFiles
    a=glob("*/*/preds-2024-08-29.csv")
    b=glob("*/*/*/preds-2024-08-29.csv")
    c=glob("*/*/*/*/preds-2024-08-29.csv")
    predictions = [a ; b ; c]
    ## or
    predictions = glob("*/*/preds-2024-10-21.csv")
    for file in predictions
    try
    make_clips_kahurangi(file)
    catch x
    println(x)
    end
    end