EDYR5C55YKPEMJOS4O6YEUK5JYUWSX4NTPQGG4GLH5QL2O62GTPQC
I use this function to find kiwi from new data gathered on a trip.
I use this function to find kiwi from new data gathered on a trip. And to predict D/F/M/N for images clipped from primary detections.
It works on both audio (wav or flac) and png images.
# see load_model() from train, different input types
function load_model(model_path::String)
model_state = JLD2.load(model_path, "model_state")
model_classes = length(model_state[1][2][1][3][2])
f = Metalhead.ResNet(18, pretrain = false).layers
l = Flux.Chain(AdaptiveMeanPool((1, 1)), Flux.flatten, Dense(512 => model_classes))
model = Flux.Chain(f[1], l)
Flux.loadmodel!(model, model_state)
return model
end
#=
function load_bson(model_path::String)
BSON.@load model_path model
end
=#
function predict_folder(folder::String, model)
wav = glob("$folder/*.[W,w][A,a][V,v]")
flac = glob("$folder/*.flac")
audio_files = vcat(wav, flac) #if wav and flac both present will predict on all
png_files = glob("$folder/*.png")
#it will predict on images when both images and audio present
if isempty(png_files)
predict_audio_folder(audio_files, model, folder)
else
predict_image_folder(png_files, model, folder)
end
end
device = CUDA.functional() ? gpu : cpu
# Predict from png images
function load_audio_file(file::String)
ext = split(file, ".")[end]
@assert ext in ["WAV", "wav", "flac"] "Unsupported audio file type, requires wav or flac."
if ext in ["WAV", "wav"]
signal, freq = WAV.wavread(file)
else
signal, freq = load(file)
end
@assert !isempty(signal[:, 1]) "$file seems to be empty, could it be corrupted?\nYou could delete it, or replace it with a known\ngood version from SD card or backup."
return signal, freq
function predict_image_folder(png_files::Vector{String}, model, folder::String)
l = length(png_files)
@assert (l > 0) "No png files present in $folder"
@info "$(l) png_files in $folder"
save_path = "$folder/preds-$(today()).csv"
loader = png_loader(png_files)
@time preds, files = predict_pngs(model, loader)
f = split.(files, "/") |> x -> last.(x)
df = DataFrame(file = f, label = preds)
CSV.write("$save_path", df)
function resample_to_16000hz(signal, freq)
signal = DSP.resample(signal, 16000.0f0 / freq; dims = 1)
freq = 16000
return signal, freq
function png_loader(png_files::Vector{String})
loader = Flux.DataLoader(
PredictImageContainer(png_files);
batchsize = 64,
collate = true,
parallel = true,
)
device == gpu ? loader = CuIterator(loader) : nothing
return loader
# need to change divisor to a overlap fraction, chech interaction with audioloader()
# if divisor is 0, then no overlap atm
function get_images_from_audio(file::String, increment::Int = 5, divisor::Int = 2) #5s sample, 2.5s hop
signal, freq = load_audio_file(file)
if freq > 16000
signal, freq = resample_to_16000hz(signal, freq)
function predict_pngs(m, d)
@info "Predicting..."
pred = []
path = []
for (x, pth) in d
p = Flux.onecold(m(x))
append!(pred, p)
append!(path, pth)
f = convert(Int, freq)
inc = increment * f
#hop = f * increment ÷ divisor #need guarunteed Int, maybe not anymore, refactor
hop = f * increment / divisor |> x -> x == Inf ? 0 : trunc(Int, x)
split_signal = DSP.arraysplit(signal[:, 1], inc, hop)
raw_images = ThreadsX.map(x -> get_image_for_inference(x, f), split_signal)
n_samples = length(raw_images)
return raw_images, n_samples
return pred, path
start_time = 0:(increment/divisor):(n_samples-1)*(increment/divisor)
end_time = increment:(increment/divisor):(n_samples+1)*(increment/divisor)
time = collect(zip(start_time, end_time))
loader = Flux.DataLoader((images, time), batchsize = n_samples, shuffle = false)
device == gpu ? loader = CuIterator(loader) : nothing #check this works with gpu
return loader
end
function reshape_images(raw_images, n_samples)
images =
#! format: off
hcat(raw_images...) |>
x -> reshape(x, (224, 224, 3, n_samples))
#! format: on
return images
# Predict from audio files
function predict_audio_folder(audio_files::Vector{String}, model, folder::String)
l = length(audio_files)
@assert (l > 0) "No wav or flac audio files present in $folder"
@info "$(l) audio_files in $folder"
df = DataFrame(
file = String[],
start_time = Float64[],
end_time = Float64[],
label = Int[],
)
save_path = "$folder/preds-$(today()).csv"
CSV.write("$save_path", df)
for file in audio_files
df = predict_audio_file(file, model)
CSV.write("$save_path", df, append = true)
end
function predict_folder(folder::String, model)
wav = glob("$folder/*.[W,w][A,a][V,v]")
flac = glob("$folder/*.flac")
audio_files = vcat(wav, flac) #if wav and flac both present will predict on all
png_files = glob("$folder/*.png")
#it will predict on images when both images and audio present
if isempty(png_files)
predict_audio_folder(audio_files, model, folder)
else
predict_image_folder(png_files, model, folder)
end
end
function audio_loader(file::String, increment::Int = 5, divisor::Int = 2)
raw_images, n_samples = get_images_from_audio(file::String, increment, divisor)
images = reshape_images(raw_images, n_samples)
function predict_audio_folder(audio_files::Vector{String}, model, folder::String)
l = length(audio_files)
@assert (l > 0) "No wav or flac audio files present in $folder"
@info "$(l) audio_files in $folder"
df = DataFrame(
file = String[],
start_time = Float64[],
end_time = Float64[],
label = Int[],
)
save_path = "$folder/preds-$(today()).csv"
CSV.write("$save_path", df)
for file in audio_files
df = predict_audio_file(file, model)
CSV.write("$save_path", df, append = true)
end
end
start_time = 0:(increment/divisor):(n_samples-1)*(increment/divisor)
end_time = increment:(increment/divisor):(n_samples+1)*(increment/divisor)
time = collect(zip(start_time, end_time))
function predict_image_folder(png_files::Vector{String}, model, folder::String)
l = length(png_files)
@assert (l > 0) "No png files present in $folder"
@info "$(l) png_files in $folder"
save_path = "$folder/preds-$(today()).csv"
loader = png_loader(png_files)
@time preds, files = predict_pngs(model, loader)
f = split.(files, "/") |> x -> last.(x)
df = DataFrame(file = f, label = preds)
CSV.write("$save_path", df)
loader = Flux.DataLoader((images, time), batchsize = n_samples, shuffle = false)
device == gpu ? loader = CuIterator(loader) : nothing #check this works with gpu
return loader
function png_loader(png_files::Vector{String})
loader = Flux.DataLoader(
PredictImageContainer(png_files);
batchsize = 64,
collate = true,
parallel = true,
)
device == gpu ? loader = CuIterator(loader) : nothing
return loader
function reshape_images(raw_images, n_samples)
images =
#! format: off
hcat(raw_images...) |>
x -> reshape(x, (224, 224, 3, n_samples))
#! format: on
return images
function predict_pngs(m, d)
@info "Predicting..."
pred = []
path = []
for (x, pth) in d
p = Flux.onecold(m(x))
append!(pred, p)
append!(path, pth)
# need to change divisor to a overlap fraction, chech interaction with audioloader()
# if divisor is 0, then no overlap atm
function get_images_from_audio(file::String, increment::Int = 5, divisor::Int = 2) #5s sample, 2.5s hop
signal, freq = load_audio_file(file)
if freq > 16000
signal, freq = resample_to_16000hz(signal, freq)
return pred, path
f = convert(Int, freq)
inc = increment * f
#hop = f * increment ÷ divisor #need guarunteed Int, maybe not anymore, refactor
hop = f * increment / divisor |> x -> x == Inf ? 0 : trunc(Int, x)
split_signal = DSP.arraysplit(signal[:, 1], inc, hop)
raw_images = ThreadsX.map(x -> get_image_for_inference(x, f), split_signal)
n_samples = length(raw_images)
return raw_images, n_samples
# see load_model() from train, different input types
function load_model(model_path::String)
model_state = JLD2.load(model_path, "model_state")
model_classes = length(model_state[1][2][1][3][2])
f = Metalhead.ResNet(18, pretrain = false).layers
l = Flux.Chain(AdaptiveMeanPool((1, 1)), Flux.flatten, Dense(512 => model_classes))
model = Flux.Chain(f[1], l)
Flux.loadmodel!(model, model_state)
return model
function load_audio_file(file::String)
ext = split(file, ".")[end]
@assert ext in ["WAV", "wav", "flac"] "Unsupported audio file type, requires wav or flac."
if ext in ["WAV", "wav"]
signal, freq = WAV.wavread(file)
else
signal, freq = load(file)
end
@assert !isempty(signal[:, 1]) "$file seems to be empty, could it be corrupted?\nYou could delete it, or replace it with a known\ngood version from SD card or backup."
return signal, freq
function move_files_to_dataset(input_file::String, output_path::String=/media/david/SSD2/PrimaryDataset/kiwi_set/)
df = DataFrame(CSV.File(input_file))
@assert nrow(df) > 0 "Empty csv therefore dataframe"
if "box" in names(df)
@transform!(df, @byrow :start_time = first(eval(Meta.parse(:box))) )
@transform!(df, @byrow :end_time = last(eval(Meta.parse(:box))) )
end
for col_name in ["location", "file", "start_time", "end_time"]
@assert col_name in names(df) "Column $col_name not present in csv"
end
select!(df, :location, :file, :start_time, :end_time)
@transform!(df, @byrow :key = :location * "-" * :file )
k=levels(df.key) #Vector{String}:
for item in k
fldr = split(item, ".")[end-1]
outf = replace(item, ".wav" => ".flac", ".WAV" => ".flac")
if !isfile("$output_path$(fldr)/$outf")
println(item)
l,f=split(item, "-")
b=glob("$l/*/$f")
@assert length(b) == 1
mkpath("$fldr")
signal, freq = Skraak.load_audio_file(b)
save("$output_path$(fldr)/$outf", signal, freq)
function move_files_to_dataset(
input_file::String,
output_path::String = "/media/david/SSD2/PrimaryDataset/kiwi_set/",
)
df = DataFrame(CSV.File(input_file))
@assert nrow(df) > 0 "Empty csv therefore dataframe"
if "box" in names(df)
@transform!(df, @byrow :start_time = first(eval(Meta.parse(:box))))
@transform!(df, @byrow :end_time = last(eval(Meta.parse(:box))))
end
for col_name in ["location", "file", "start_time", "end_time"]
@assert col_name in names(df) "Column $col_name not present in csv"
end
select!(df, :location, :file, :start_time, :end_time)
@transform!(df, @byrow :key = :location * "-" * :file)
k = levels(df.key) #Vector{String}:
for item in k
fldr = split(item, ".")[end-1]
outf = replace(item, ".wav" => ".flac", ".WAV" => ".flac")
if !isfile("$output_path$(fldr)/$outf")
println(item)
l, f = split(item, "-")
b = glob("$l/*/$f")
@assert length(b) == 1
mkpath("$fldr")
signal, freq = Skraak.load_audio_file(b)
save("$output_path$(fldr)/$outf", signal, freq)
end
@info "$(length(levels(df.key))) files"
@info "$(length(df.key)) labels"
select!(df, :key, :start_time, :end_time)
gdf = groupby(df, :key)
for f in gdf
file = first(f.key) |> x -> replace(x, ".wav"=>".flac", ".WAV"=>".flac")
folder = split(file, ".")[1]
kiwi = f.kiwi
@info (folder, duration, kiwi)
@info "$(length(levels(df.key))) files"
@info "$(length(df.key)) labels"
select!(df, :key, :start_time, :end_time)
gdf = groupby(df, :key)
for f in gdf
file = first(f.key) |> x -> replace(x, ".wav" => ".flac", ".WAV" => ".flac")
folder = split(file, ".")[1]
#signal, freq = wavread("kiwi_set_2023-11-13/$folder/$file")
signal, freq = Skraak.load_audio_file("kiwi_set_2023-11-13/$folder/$file")
length_signal = length(signal)
duration = length_signal / freq
mkpath("kiwi_set_2023-11-13/$folder/K")
mkpath("kiwi_set_2023-11-13/$folder/N")
ldf = DataFrame(second=1:duration, kiwi=false)
for clip in kiwi
clip[1] > 0 ? st = clip[1] : st = 1
clip[2] <= duration ? nd = clip[2] : nd = duration
ldf.kiwi[st:nd] .= true
end
start = 1
while start+4 <= duration
wdf = ldf[start:start+4, :]
#make image
st, en = calculate_clip(start, start+4, freq, length_signal)
sample = signal[Int(st):Int(en)]
plot = get_image_from_sample(sample, freq);
if true in levels(wdf.kiwi)
#save to K folder
#savefig(plot, "kiwi_set-2023-09-07/$folder/K/$folder-$start-$(start+4).png")
PNGFiles.save("kiwi_set_2023-11-13/$folder/K/$folder-$start-$(start+4).png", plot)
start += 2
else
#save to N folder
#savefig(plot, "kiwi_set-2023-09-07/$folder/N/$folder-$start-$(start+4).png")
PNGFiles.save("kiwi_set_2023-11-13/$folder/N/$folder-$start-$(start+4).png", plot)
start += 5
end
end
if start+4 > duration
wdf = df[duration-4:duration, :]
#make image
st, en = calculate_clip(duration-4, duration, freq, length_signal)
sample = signal[Int(st):Int(en)]
plot = get_image_from_sample(sample, freq);
#save to correct folder
true in levels(wdf.kiwi) ? l="K" : l="N"
#savefig(plot, "kiwi_set-2023-09-07/$folder/$l/$folder-$(duration-4)-$duration.png")
PNGFiles.save("kiwi_set_2023-11-13/$folder/$l/$folder-$(duration-4)-$duration.png", plot)
end
end
kiwi = f.kiwi
@info (folder, duration, kiwi)
mkpath("kiwi_set_2023-11-13/$folder/K")
mkpath("kiwi_set_2023-11-13/$folder/N")
ldf = DataFrame(second = 1:duration, kiwi = false)
for clip in kiwi
clip[1] > 0 ? st = clip[1] : st = 1
clip[2] <= duration ? nd = clip[2] : nd = duration
ldf.kiwi[st:nd] .= true
end
start = 1
while start + 4 <= duration
wdf = ldf[start:start+4, :]
#make image
st, en = calculate_clip(start, start + 4, freq, length_signal)
sample = signal[Int(st):Int(en)]
plot = get_image_from_sample(sample, freq)
if true in levels(wdf.kiwi)
#save to K folder
#savefig(plot, "kiwi_set-2023-09-07/$folder/K/$folder-$start-$(start+4).png")
PNGFiles.save(
"kiwi_set_2023-11-13/$folder/K/$folder-$start-$(start+4).png",
plot,
)
start += 2
else
#save to N folder
#savefig(plot, "kiwi_set-2023-09-07/$folder/N/$folder-$start-$(start+4).png")
PNGFiles.save(
"kiwi_set_2023-11-13/$folder/N/$folder-$start-$(start+4).png",
plot,
)
start += 5
end
end
if start + 4 > duration
wdf = df[duration-4:duration, :]
#make image
st, en = calculate_clip(duration - 4, duration, freq, length_signal)
sample = signal[Int(st):Int(en)]
plot = get_image_from_sample(sample, freq)
#save to correct folder
true in levels(wdf.kiwi) ? l = "K" : l = "N"
#savefig(plot, "kiwi_set-2023-09-07/$folder/$l/$folder-$(duration-4)-$duration.png")
PNGFiles.save(
"kiwi_set_2023-11-13/$folder/$l/$folder-$(duration-4)-$duration.png",
plot,
)
end
end
* file(String),start_time,end_time,label(Int) (where start_time and end_time are in seconds from the start of the wav file)
* file(String),start_time,end_time,label(Integer) (where start_time and end_time are in seconds from the start of the wav file)
> It is better __not__ to have everything in 2 big folders, 100_000 files in a folder on a Fat32 removable drive will rapidly grind to a stand still.
> It is better __not__ to have everything in big folders, 100_000 files in a folder on a Fat32 removable drive will rapidly grind to a stand still.
4. Train a Resnet18 model, either pretrained on Imagenet, or preferably the pretrained Skraak Kiwi model, which is currently trained on 7_400_000 images.
4. Train a Resnet18 model, either pretrained on Imagenet, or preferably the pretrained Skraak Kiwi model, which is currently trained on 7_700_000 images.
Skraak will try to find png images first, in the folders covered by the glob pattern. If there are no png's found it will predict on wav or flac files, using 5 second audio clips, converted to 224x224 pixel RGB spectrogram images, with a 2.5 recond hop.
Skraak will try to find png images first, in the folders covered by the glob pattern. If there are no png's found it will predict on wav or flac files, using 5 second audio clips, converted to 224x224 pixel RGB spectrogram images, with a 2.5 second hop.