EDYR5C55YKPEMJOS4O6YEUK5JYUWSX4NTPQGG4GLH5QL2O62GTPQC I use this function to find kiwi from new data gathered on a trip.
I use this function to find kiwi from new data gathered on a trip. And to predict D/F/M/N for images clipped from primary detections.It works on both audio (wav or flac) and png images.
# see load_model() from train, different input typesfunction load_model(model_path::String)model_state = JLD2.load(model_path, "model_state")model_classes = length(model_state[1][2][1][3][2])f = Metalhead.ResNet(18, pretrain = false).layersl = Flux.Chain(AdaptiveMeanPool((1, 1)), Flux.flatten, Dense(512 => model_classes))model = Flux.Chain(f[1], l)Flux.loadmodel!(model, model_state)return modelend#=function load_bson(model_path::String)BSON.@load model_path modelend=#function predict_folder(folder::String, model)wav = glob("$folder/*.[W,w][A,a][V,v]")flac = glob("$folder/*.flac")audio_files = vcat(wav, flac) #if wav and flac both present will predict on allpng_files = glob("$folder/*.png")#it will predict on images when both images and audio presentif isempty(png_files)predict_audio_folder(audio_files, model, folder)elsepredict_image_folder(png_files, model, folder)endenddevice = CUDA.functional() ? gpu : cpu# Predict from png images
function load_audio_file(file::String)ext = split(file, ".")[end]@assert ext in ["WAV", "wav", "flac"] "Unsupported audio file type, requires wav or flac."if ext in ["WAV", "wav"]signal, freq = WAV.wavread(file)elsesignal, freq = load(file)end@assert !isempty(signal[:, 1]) "$file seems to be empty, could it be corrupted?\nYou could delete it, or replace it with a known\ngood version from SD card or backup."return signal, freq
function predict_image_folder(png_files::Vector{String}, model, folder::String)l = length(png_files)@assert (l > 0) "No png files present in $folder"@info "$(l) png_files in $folder"save_path = "$folder/preds-$(today()).csv"loader = png_loader(png_files)@time preds, files = predict_pngs(model, loader)f = split.(files, "/") |> x -> last.(x)df = DataFrame(file = f, label = preds)CSV.write("$save_path", df)
function resample_to_16000hz(signal, freq)signal = DSP.resample(signal, 16000.0f0 / freq; dims = 1)freq = 16000return signal, freq
function png_loader(png_files::Vector{String})loader = Flux.DataLoader(PredictImageContainer(png_files);batchsize = 64,collate = true,parallel = true,)device == gpu ? loader = CuIterator(loader) : nothingreturn loader
# need to change divisor to a overlap fraction, chech interaction with audioloader()# if divisor is 0, then no overlap atmfunction get_images_from_audio(file::String, increment::Int = 5, divisor::Int = 2) #5s sample, 2.5s hopsignal, freq = load_audio_file(file)if freq > 16000signal, freq = resample_to_16000hz(signal, freq)
function predict_pngs(m, d)@info "Predicting..."pred = []path = []for (x, pth) in dp = Flux.onecold(m(x))append!(pred, p)append!(path, pth)
f = convert(Int, freq)inc = increment * f#hop = f * increment ÷ divisor #need guarunteed Int, maybe not anymore, refactorhop = f * increment / divisor |> x -> x == Inf ? 0 : trunc(Int, x)split_signal = DSP.arraysplit(signal[:, 1], inc, hop)raw_images = ThreadsX.map(x -> get_image_for_inference(x, f), split_signal)n_samples = length(raw_images)return raw_images, n_samples
return pred, path
start_time = 0:(increment/divisor):(n_samples-1)*(increment/divisor)end_time = increment:(increment/divisor):(n_samples+1)*(increment/divisor)time = collect(zip(start_time, end_time))loader = Flux.DataLoader((images, time), batchsize = n_samples, shuffle = false)device == gpu ? loader = CuIterator(loader) : nothing #check this works with gpureturn loaderendfunction reshape_images(raw_images, n_samples)images =#! format: offhcat(raw_images...) |>x -> reshape(x, (224, 224, 3, n_samples))#! format: onreturn images
# Predict from audio filesfunction predict_audio_folder(audio_files::Vector{String}, model, folder::String)l = length(audio_files)@assert (l > 0) "No wav or flac audio files present in $folder"@info "$(l) audio_files in $folder"df = DataFrame(file = String[],start_time = Float64[],end_time = Float64[],label = Int[],)save_path = "$folder/preds-$(today()).csv"CSV.write("$save_path", df)for file in audio_filesdf = predict_audio_file(file, model)CSV.write("$save_path", df, append = true)end
function predict_folder(folder::String, model)wav = glob("$folder/*.[W,w][A,a][V,v]")flac = glob("$folder/*.flac")audio_files = vcat(wav, flac) #if wav and flac both present will predict on allpng_files = glob("$folder/*.png")#it will predict on images when both images and audio presentif isempty(png_files)predict_audio_folder(audio_files, model, folder)elsepredict_image_folder(png_files, model, folder)endend
function audio_loader(file::String, increment::Int = 5, divisor::Int = 2)raw_images, n_samples = get_images_from_audio(file::String, increment, divisor)images = reshape_images(raw_images, n_samples)
function predict_audio_folder(audio_files::Vector{String}, model, folder::String)l = length(audio_files)@assert (l > 0) "No wav or flac audio files present in $folder"@info "$(l) audio_files in $folder"df = DataFrame(file = String[],start_time = Float64[],end_time = Float64[],label = Int[],)save_path = "$folder/preds-$(today()).csv"CSV.write("$save_path", df)for file in audio_filesdf = predict_audio_file(file, model)CSV.write("$save_path", df, append = true)endend
start_time = 0:(increment/divisor):(n_samples-1)*(increment/divisor)end_time = increment:(increment/divisor):(n_samples+1)*(increment/divisor)time = collect(zip(start_time, end_time))
function predict_image_folder(png_files::Vector{String}, model, folder::String)l = length(png_files)@assert (l > 0) "No png files present in $folder"@info "$(l) png_files in $folder"save_path = "$folder/preds-$(today()).csv"loader = png_loader(png_files)@time preds, files = predict_pngs(model, loader)f = split.(files, "/") |> x -> last.(x)df = DataFrame(file = f, label = preds)CSV.write("$save_path", df)
loader = Flux.DataLoader((images, time), batchsize = n_samples, shuffle = false)device == gpu ? loader = CuIterator(loader) : nothing #check this works with gpureturn loader
function png_loader(png_files::Vector{String})loader = Flux.DataLoader(PredictImageContainer(png_files);batchsize = 64,collate = true,parallel = true,)device == gpu ? loader = CuIterator(loader) : nothingreturn loader
function reshape_images(raw_images, n_samples)images =#! format: offhcat(raw_images...) |>x -> reshape(x, (224, 224, 3, n_samples))#! format: onreturn images
function predict_pngs(m, d)@info "Predicting..."pred = []path = []for (x, pth) in dp = Flux.onecold(m(x))append!(pred, p)append!(path, pth)
# need to change divisor to a overlap fraction, chech interaction with audioloader()# if divisor is 0, then no overlap atmfunction get_images_from_audio(file::String, increment::Int = 5, divisor::Int = 2) #5s sample, 2.5s hopsignal, freq = load_audio_file(file)if freq > 16000signal, freq = resample_to_16000hz(signal, freq)
return pred, path
f = convert(Int, freq)inc = increment * f#hop = f * increment ÷ divisor #need guarunteed Int, maybe not anymore, refactorhop = f * increment / divisor |> x -> x == Inf ? 0 : trunc(Int, x)split_signal = DSP.arraysplit(signal[:, 1], inc, hop)raw_images = ThreadsX.map(x -> get_image_for_inference(x, f), split_signal)n_samples = length(raw_images)return raw_images, n_samples
# see load_model() from train, different input typesfunction load_model(model_path::String)model_state = JLD2.load(model_path, "model_state")model_classes = length(model_state[1][2][1][3][2])f = Metalhead.ResNet(18, pretrain = false).layersl = Flux.Chain(AdaptiveMeanPool((1, 1)), Flux.flatten, Dense(512 => model_classes))model = Flux.Chain(f[1], l)Flux.loadmodel!(model, model_state)return model
function load_audio_file(file::String)ext = split(file, ".")[end]@assert ext in ["WAV", "wav", "flac"] "Unsupported audio file type, requires wav or flac."if ext in ["WAV", "wav"]signal, freq = WAV.wavread(file)elsesignal, freq = load(file)end@assert !isempty(signal[:, 1]) "$file seems to be empty, could it be corrupted?\nYou could delete it, or replace it with a known\ngood version from SD card or backup."return signal, freq
function move_files_to_dataset(input_file::String, output_path::String=/media/david/SSD2/PrimaryDataset/kiwi_set/)df = DataFrame(CSV.File(input_file))@assert nrow(df) > 0 "Empty csv therefore dataframe"if "box" in names(df)@transform!(df, @byrow :start_time = first(eval(Meta.parse(:box))) )@transform!(df, @byrow :end_time = last(eval(Meta.parse(:box))) )endfor col_name in ["location", "file", "start_time", "end_time"]@assert col_name in names(df) "Column $col_name not present in csv"endselect!(df, :location, :file, :start_time, :end_time)@transform!(df, @byrow :key = :location * "-" * :file )k=levels(df.key) #Vector{String}:for item in kfldr = split(item, ".")[end-1]outf = replace(item, ".wav" => ".flac", ".WAV" => ".flac")if !isfile("$output_path$(fldr)/$outf")println(item)l,f=split(item, "-")b=glob("$l/*/$f")@assert length(b) == 1mkpath("$fldr")signal, freq = Skraak.load_audio_file(b)save("$output_path$(fldr)/$outf", signal, freq)
function move_files_to_dataset(input_file::String,output_path::String = "/media/david/SSD2/PrimaryDataset/kiwi_set/",)df = DataFrame(CSV.File(input_file))@assert nrow(df) > 0 "Empty csv therefore dataframe"if "box" in names(df)@transform!(df, @byrow :start_time = first(eval(Meta.parse(:box))))@transform!(df, @byrow :end_time = last(eval(Meta.parse(:box))))endfor col_name in ["location", "file", "start_time", "end_time"]@assert col_name in names(df) "Column $col_name not present in csv"endselect!(df, :location, :file, :start_time, :end_time)@transform!(df, @byrow :key = :location * "-" * :file)k = levels(df.key) #Vector{String}:for item in kfldr = split(item, ".")[end-1]outf = replace(item, ".wav" => ".flac", ".WAV" => ".flac")if !isfile("$output_path$(fldr)/$outf")println(item)l, f = split(item, "-")b = glob("$l/*/$f")@assert length(b) == 1mkpath("$fldr")signal, freq = Skraak.load_audio_file(b)save("$output_path$(fldr)/$outf", signal, freq)end
@info "$(length(levels(df.key))) files"@info "$(length(df.key)) labels"select!(df, :key, :start_time, :end_time)gdf = groupby(df, :key)for f in gdffile = first(f.key) |> x -> replace(x, ".wav"=>".flac", ".WAV"=>".flac")folder = split(file, ".")[1]kiwi = f.kiwi@info (folder, duration, kiwi)
@info "$(length(levels(df.key))) files"@info "$(length(df.key)) labels"select!(df, :key, :start_time, :end_time)gdf = groupby(df, :key)for f in gdffile = first(f.key) |> x -> replace(x, ".wav" => ".flac", ".WAV" => ".flac")folder = split(file, ".")[1]
#signal, freq = wavread("kiwi_set_2023-11-13/$folder/$file")signal, freq = Skraak.load_audio_file("kiwi_set_2023-11-13/$folder/$file")length_signal = length(signal)duration = length_signal / freqmkpath("kiwi_set_2023-11-13/$folder/K")mkpath("kiwi_set_2023-11-13/$folder/N")ldf = DataFrame(second=1:duration, kiwi=false)for clip in kiwiclip[1] > 0 ? st = clip[1] : st = 1clip[2] <= duration ? nd = clip[2] : nd = durationldf.kiwi[st:nd] .= trueendstart = 1while start+4 <= durationwdf = ldf[start:start+4, :]#make imagest, en = calculate_clip(start, start+4, freq, length_signal)sample = signal[Int(st):Int(en)]plot = get_image_from_sample(sample, freq);if true in levels(wdf.kiwi)#save to K folder#savefig(plot, "kiwi_set-2023-09-07/$folder/K/$folder-$start-$(start+4).png")PNGFiles.save("kiwi_set_2023-11-13/$folder/K/$folder-$start-$(start+4).png", plot)start += 2else#save to N folder#savefig(plot, "kiwi_set-2023-09-07/$folder/N/$folder-$start-$(start+4).png")PNGFiles.save("kiwi_set_2023-11-13/$folder/N/$folder-$start-$(start+4).png", plot)start += 5endendif start+4 > durationwdf = df[duration-4:duration, :]#make imagest, en = calculate_clip(duration-4, duration, freq, length_signal)sample = signal[Int(st):Int(en)]plot = get_image_from_sample(sample, freq);#save to correct foldertrue in levels(wdf.kiwi) ? l="K" : l="N"#savefig(plot, "kiwi_set-2023-09-07/$folder/$l/$folder-$(duration-4)-$duration.png")PNGFiles.save("kiwi_set_2023-11-13/$folder/$l/$folder-$(duration-4)-$duration.png", plot)endend
kiwi = f.kiwi@info (folder, duration, kiwi)
mkpath("kiwi_set_2023-11-13/$folder/K")mkpath("kiwi_set_2023-11-13/$folder/N")ldf = DataFrame(second = 1:duration, kiwi = false)for clip in kiwiclip[1] > 0 ? st = clip[1] : st = 1clip[2] <= duration ? nd = clip[2] : nd = durationldf.kiwi[st:nd] .= trueendstart = 1while start + 4 <= durationwdf = ldf[start:start+4, :]#make imagest, en = calculate_clip(start, start + 4, freq, length_signal)sample = signal[Int(st):Int(en)]plot = get_image_from_sample(sample, freq)if true in levels(wdf.kiwi)#save to K folder#savefig(plot, "kiwi_set-2023-09-07/$folder/K/$folder-$start-$(start+4).png")PNGFiles.save("kiwi_set_2023-11-13/$folder/K/$folder-$start-$(start+4).png",plot,)start += 2else#save to N folder#savefig(plot, "kiwi_set-2023-09-07/$folder/N/$folder-$start-$(start+4).png")PNGFiles.save("kiwi_set_2023-11-13/$folder/N/$folder-$start-$(start+4).png",plot,)start += 5endendif start + 4 > durationwdf = df[duration-4:duration, :]#make imagest, en = calculate_clip(duration - 4, duration, freq, length_signal)sample = signal[Int(st):Int(en)]plot = get_image_from_sample(sample, freq)#save to correct foldertrue in levels(wdf.kiwi) ? l = "K" : l = "N"#savefig(plot, "kiwi_set-2023-09-07/$folder/$l/$folder-$(duration-4)-$duration.png")PNGFiles.save("kiwi_set_2023-11-13/$folder/$l/$folder-$(duration-4)-$duration.png",plot,)endend
* file(String),start_time,end_time,label(Int) (where start_time and end_time are in seconds from the start of the wav file)
* file(String),start_time,end_time,label(Integer) (where start_time and end_time are in seconds from the start of the wav file)
> It is better __not__ to have everything in 2 big folders, 100_000 files in a folder on a Fat32 removable drive will rapidly grind to a stand still.
> It is better __not__ to have everything in big folders, 100_000 files in a folder on a Fat32 removable drive will rapidly grind to a stand still.
4. Train a Resnet18 model, either pretrained on Imagenet, or preferably the pretrained Skraak Kiwi model, which is currently trained on 7_400_000 images.
4. Train a Resnet18 model, either pretrained on Imagenet, or preferably the pretrained Skraak Kiwi model, which is currently trained on 7_700_000 images.
Skraak will try to find png images first, in the folders covered by the glob pattern. If there are no png's found it will predict on wav or flac files, using 5 second audio clips, converted to 224x224 pixel RGB spectrogram images, with a 2.5 recond hop.
Skraak will try to find png images first, in the folders covered by the glob pattern. If there are no png's found it will predict on wav or flac files, using 5 second audio clips, converted to 224x224 pixel RGB spectrogram images, with a 2.5 second hop.