using LinearAlgebra using Plots using WAV using FFTW filepath = "../../../assets/attachments/data/audio/piano-phrase.wav" fr, frs = wavread(filepath) tempos = (0:size(fr)[1]-1)./frs WAVArray(frs, fr, "Frase de piano") display(plot(tempos, fr[:,1], xlabel="tempo (s)", ylabel="amplitude", label=nothing, title="Sinal sonoro", titlefont=10)) println("Duração: $(length(fr)/frs) s") frame_duration = 100 # ms - padrão mp3 é de 26 ms frame_length = Int(div(frame_duration * frs, 1000)) fr_frame = fr[1:frame_length, 1] WAVArray(frs, fr_frame, "Frase de piano (primeiros $(frame_duration)ms)") tempos_frame = (0:length(fr_frame)-1) / frs freqs_frame = (0:div(frame_length, 2)) / frs # frequências fr_frame_hat = rfft(fr_frame) Z_frame = [ abs(fr_frame_hat[1]) / frame_length; 2abs.(fr_frame_hat[2:end]) / frame_length ] display(WAVArray(frs, fr_frame, "Parte da frase de piano")) display(plot(tempos_frame, fr_frame, xlabel="tempo (s)", ylabel="amplitude", label=nothing, title="Sinal do canal 1", titlefont=10)) display(plot(freqs_frame, Z_frame, xlabel="frequências (Hz)", ylabel="amplitude", label=nothing, title="Espectro de amplitude", titlefont=10)) fr_frame_hat Z_frame [eachindex(Z_frame) Z_frame] Z_frame_top = sortslices([eachindex(Z_frame) Z_frame], dims = 1, by = x -> x[2], rev = true) scatter(Z_frame_top[:,2]) length_top = 10 Z_top_inds = Int.(Z_frame_top[1:length_top, 1]) # need to specify length of inverse real transform since the rfft is [d/2] long # and the inverse could be 2[d/2] or 2[d/2] + 1, depending on whether the original # vector has even or odd length irfft(rfft(fr_frame), frame_length) ≈ fr_frame irfft(fr_frame_hat, frame_length) fr_frame_hat_top = zero(fr_frame_hat) fr_frame_hat_top[Z_top_inds] .= fr_frame_hat[Z_top_inds] fr_frame_top = irfft(fr_frame_hat_top, frame_length) freqs_frame = (0:div(frame_length, 2)) / frs # frequências fr_frame_top_hat = rfft(fr_frame) Z_frame_top_back = [ abs(fr_frame_top_hat[1]) / frame_length; 2abs.(fr_frame_top_hat[2:end]) / frame_length ] display(WAVArray(frs, fr_frame_top, "Parte da frase de piano comprimida")) display(plot(tempos_frame, fr_frame_top, xlabel="tempo (s)", ylabel="amplitude", label=nothing, title="Sinal do canal 1", titlefont=10)) display(plot(freqs_frame, Z_frame_top_back, xlabel="frequências (Hz)", ylabel="amplitude", label=nothing, title="Espectro de amplitude", titlefont=10)) display(WAVArray(frs, fr_frame, "Parte da frase de piano")) display(plot(tempos_frame, fr_frame, xlabel="tempo (s)", ylabel="amplitude", label=nothing, title="Sinal do canal 1", titlefont=10)) display(plot(freqs_frame, Z_frame, xlabel="frequências (Hz)", ylabel="amplitude", label=nothing, title="Espectro de amplitude", titlefont=10)) function compress_frame(fr_frame, frs, length_top = 50) frame_length = length(fr_frame) # tempos_frame = ( 0 : frame_length - 1 ) / frs freqs_frame = (0 : div(frame_length, 2)) / frs # frequências fr_frame_hat = rfft(fr_frame) Z_frame = [ abs(fr_frame_hat[1]) / frame_length; 2abs.(fr_frame_hat[2:end]) / frame_length ] Z_frame_top = sortslices([eachindex(Z_frame) Z_frame], dims = 1, by = x -> x[2], rev = true) Z_top_inds = Int.(Z_frame_top[1:length_top, 1]) fr_frame_hat_top = zero(fr_frame_hat) fr_frame_hat_top[Z_top_inds] .= fr_frame_hat[Z_top_inds] return ( inds = Z_top_inds, freqs = fr_frame_hat[Z_top_inds], ) end function uncompress_frame(inds, freqs, frame_length) fr_frame_hat_top = zeros(ComplexF64, div(frame_length, 2) + 1) fr_frame_hat_top[inds] .= freqs fr_frame_back = irfft(fr_frame_hat_top, frame_length) return fr_frame_back end comp = compress_frame(fr_frame, frs) fr_frame_bak = uncompress_frame(comp.inds, comp.freqs, frame_length) display(WAVArray(frs, fr_frame_bak, "compressed audio")) frame_duration = 26 # ms - padrão mp3 é de 26 ms frame_length = Int(div(frame_duration * frs, 1000)) uncomp = Array{Float64, 2}(undef, div(length(fr), frame_length) * frame_length, 1) for n in 1:div(length(fr), frame_length) nstart = ( n - 1 ) * frame_length + 1 nend = n * frame_length lfr_frame = fr[nstart:nend, 1] comp_frame = compress_frame(lfr_frame, frs, 50) uncomp_frame = uncompress_frame(comp_frame.inds, comp_frame.freqs, length(lfr_frame)) uncomp[nstart:nend, 1] .= uncomp_frame end display(WAVArray(frs, uncomp, "compressed audio")) display(WAVArray(frs, fr, "Frase de piano"))