I use this function to find kiwi from new data gathered on a trip.
I use this function to find kiwi from new data gathered on a trip. And to predict D/F/M/N for images clipped from primary detections.
It works on both audio (wav or flac) and png images.
# see load_model() from train, different input types
function load_model(model_path::String)
model_state = JLD2.load(model_path, "model_state")
model_classes = length(model_state[1][2][1][3][2])
f = Metalhead.ResNet(18, pretrain = false).layers
l = Flux.Chain(AdaptiveMeanPool((1, 1)), Flux.flatten, Dense(512 => model_classes))
model = Flux.Chain(f[1], l)
Flux.loadmodel!(model, model_state)
return model
function load_bson(model_path::String)
BSON.@load model_path model
function predict_folder(folder::String, model)
wav = glob("$folder/*.[W,w][A,a][V,v]")
flac = glob("$folder/*.flac")
audio_files = vcat(wav, flac) #if wav and flac both present will predict on all
png_files = glob("$folder/*.png")
#it will predict on images when both images and audio present
if isempty(png_files)
predict_audio_folder(audio_files, model, folder)
predict_image_folder(png_files, model, folder)
device = CUDA.functional() ? gpu : cpu
# Predict from png images
function load_audio_file(file::String)
ext = split(file, ".")[end]
@assert ext in ["WAV", "wav", "flac"] "Unsupported audio file type, requires wav or flac."
if ext in ["WAV", "wav"]
signal, freq = WAV.wavread(file)
signal, freq = load(file)
@assert !isempty(signal[:, 1]) "$file seems to be empty, could it be corrupted?\nYou could delete it, or replace it with a known\ngood version from SD card or backup."
return signal, freq
function predict_image_folder(png_files::Vector{String}, model, folder::String)
l = length(png_files)
@assert (l > 0) "No png files present in $folder"
@info "$(l) png_files in $folder"
save_path = "$folder/preds-$(today()).csv"
loader = png_loader(png_files)
@time preds, files = predict_pngs(model, loader)
f = split.(files, "/") |> x -> last.(x)
df = DataFrame(file = f, label = preds)
CSV.write("$save_path", df)
function resample_to_16000hz(signal, freq)
signal = DSP.resample(signal, 16000.0f0 / freq; dims = 1)
freq = 16000
return signal, freq
function png_loader(png_files::Vector{String})
loader = Flux.DataLoader(
batchsize = 64,
collate = true,
parallel = true,
device == gpu ? loader = CuIterator(loader) : nothing
return loader
# need to change divisor to a overlap fraction, chech interaction with audioloader()
# if divisor is 0, then no overlap atm
function get_images_from_audio(file::String, increment::Int = 5, divisor::Int = 2) #5s sample, 2.5s hop
signal, freq = load_audio_file(file)
if freq > 16000
signal, freq = resample_to_16000hz(signal, freq)
function predict_pngs(m, d)
@info "Predicting..."
pred = []
path = []
for (x, pth) in d
p = Flux.onecold(m(x))
append!(pred, p)
append!(path, pth)
f = convert(Int, freq)
inc = increment * f
#hop = f * increment ÷ divisor #need guarunteed Int, maybe not anymore, refactor
hop = f * increment / divisor |> x -> x == Inf ? 0 : trunc(Int, x)
split_signal = DSP.arraysplit(signal[:, 1], inc, hop)
raw_images = ThreadsX.map(x -> get_image_for_inference(x, f), split_signal)
n_samples = length(raw_images)
return raw_images, n_samples
return pred, path
start_time = 0:(increment/divisor):(n_samples-1)*(increment/divisor)
end_time = increment:(increment/divisor):(n_samples+1)*(increment/divisor)
time = collect(zip(start_time, end_time))
loader = Flux.DataLoader((images, time), batchsize = n_samples, shuffle = false)
device == gpu ? loader = CuIterator(loader) : nothing #check this works with gpu
return loader
function reshape_images(raw_images, n_samples)
images =
#! format: off
hcat(raw_images...) |>
x -> reshape(x, (224, 224, 3, n_samples))
#! format: on
return images
# Predict from audio files
function predict_audio_folder(audio_files::Vector{String}, model, folder::String)
l = length(audio_files)
@assert (l > 0) "No wav or flac audio files present in $folder"
@info "$(l) audio_files in $folder"
df = DataFrame(
file = String[],
start_time = Float64[],
end_time = Float64[],
label = Int[],
save_path = "$folder/preds-$(today()).csv"
CSV.write("$save_path", df)
for file in audio_files
df = predict_audio_file(file, model)
CSV.write("$save_path", df, append = true)
function audio_loader(file::String, increment::Int = 5, divisor::Int = 2)
raw_images, n_samples = get_images_from_audio(file::String, increment, divisor)
images = reshape_images(raw_images, n_samples)
start_time = 0:(increment/divisor):(n_samples-1)*(increment/divisor)
end_time = increment:(increment/divisor):(n_samples+1)*(increment/divisor)
time = collect(zip(start_time, end_time))
loader = Flux.DataLoader((images, time), batchsize = n_samples, shuffle = false)
device == gpu ? loader = CuIterator(loader) : nothing #check this works with gpu
return loader
function move_files_to_dataset(input_file::String, output_path::String=/media/david/SSD2/PrimaryDataset/kiwi_set/)
df = DataFrame(CSV.File(input_file))
@assert nrow(df) > 0 "Empty csv therefore dataframe"
if "box" in names(df)
@transform!(df, @byrow :start_time = first(eval(Meta.parse(:box))) )
@transform!(df, @byrow :end_time = last(eval(Meta.parse(:box))) )
for col_name in ["location", "file", "start_time", "end_time"]
@assert col_name in names(df) "Column $col_name not present in csv"
select!(df, :location, :file, :start_time, :end_time)
@transform!(df, @byrow :key = :location * "-" * :file )
k=levels(df.key) #Vector{String}:
for item in k
fldr = split(item, ".")[end-1]
outf = replace(item, ".wav" => ".flac", ".WAV" => ".flac")
if !isfile("$output_path$(fldr)/$outf")
l,f=split(item, "-")
@assert length(b) == 1
signal, freq = Skraak.load_audio_file(b)
save("$output_path$(fldr)/$outf", signal, freq)
@info "$(length(levels(df.key))) files"
@info "$(length(df.key)) labels"
select!(df, :key, :start_time, :end_time)
gdf = groupby(df, :key)
for f in gdf
file = first(f.key) |> x -> replace(x, ".wav"=>".flac", ".WAV"=>".flac")
folder = split(file, ".")[1]
kiwi = f.kiwi
@info (folder, duration, kiwi)
* file(String),start_time,end_time,label(Int) (where start_time and end_time are in seconds from the start of the wav file)
> It is better __not__ to have everything in 2 big folders, 100_000 files in a folder on a Fat32 removable drive will rapidly grind to a stand still.
4. Train a Resnet18 model, either pretrained on Imagenet, or preferably the pretrained Skraak Kiwi model, which is currently trained on 7_400_000 images.
Skraak will try to find png images first, in the folders covered by the glob pattern. If there are no png's found it will predict on wav or flac files, using 5 second audio clips, converted to 224x224 pixel RGB spectrogram images, with a 2.5 recond hop.
