Skip to content

Commit c3813ca

Browse files
committed
Finished repet.simonline method and example. The class is now fully operational!
1 parent 1833335 commit c3813ca

File tree

3 files changed

+81
-73
lines changed

3 files changed

+81
-73
lines changed

examples.ipynb

+29-11
Large diffs are not rendered by default.

images/repet_simonline.png

-523 KB
Loading

repet.m

+52-62
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
% http://zafarrafii.com
3535
% https://github.com/zafarrafii
3636
% https://www.linkedin.com/in/zafarrafii/
37-
% 01/26/21
37+
% 01/27/21
3838

3939
% Define the properties
4040
properties (Access = private, Constant = true)
@@ -713,136 +713,126 @@
713713
% Get the number of samples and channels
714714
[number_samples,number_channels] = size(audio_signal);
715715

716-
% Window length, window function, and step length for the STFT
717-
window_length = repet.windowlength(sampling_frequency);
718-
window_function = repet.windowfunction(window_length);
719-
step_length = repet.steplength(window_length);
716+
% Set the parameters for the STFT
717+
% (audio stationary around 40 ms, power of 2 for fast FFT and constant overlap-add (COLA),
718+
% periodic Hamming window for COLA, and step equal to half the window length for COLA)
719+
window_length = 2^nextpow2(0.04*sampling_frequency);
720+
window_function = hamming(window_length,'periodic');
721+
step_length = window_length/2;
720722

721-
% Number of time frames
723+
% Derive the number of time frames
722724
number_times = ceil((number_samples-window_length)/step_length+1);
723725

724-
% Buffer length in time frames
725-
buffer_length = round((repet.buffer_length*sampling_frequency-window_length)/step_length+1);
726+
% Derive the number of frequency channels
727+
number_frequencies = window_length/2+1;
726728

727-
% Initialize the buffer spectrogram
728-
buffer_spectrogram = zeros(window_length/2+1,buffer_length,number_channels);
729+
% Get the buffer length in time frames
730+
buffer_length2 = round((repet.buffer_length*sampling_frequency)/step_length);
729731

730-
% Open the wait bar
731-
wait_bar = waitbar(0,'Online REPET-SIM');
732+
% Initialize the buffer spectrogram
733+
buffer_spectrogram = zeros(number_frequencies,buffer_length2,number_channels);
732734

733735
% Loop over the time frames to compute the buffer spectrogram
734736
% (the last frame will be the frame to be processed)
735-
for time_index = 1:buffer_length-1
736-
737-
% Sample index in the signal
738-
sample_index = step_length*(time_index-1);
737+
k = 0;
738+
for j = 1:buffer_length2-1
739739

740740
% Loop over the channels
741-
for channel_index = 1:number_channels
741+
for i = 1:number_channels
742742

743743
% Compute the FT of the segment
744-
buffer_ft = fft(audio_signal(1+sample_index:window_length+sample_index,channel_index).*window_function);
744+
buffer_ft = fft(audio_signal(k+1:k+window_length,i).*window_function);
745745

746746
% Derive the spectrum of the frame
747-
buffer_spectrogram(:,time_index,channel_index) = abs(buffer_ft(1:window_length/2+1));
747+
buffer_spectrogram(:,j,i) = abs(buffer_ft(1:number_frequencies));
748748

749749
end
750750

751-
% Update the wait bar
752-
waitbar(time_index/number_times,wait_bar);
751+
% Update the index
752+
k = k+step_length;
753753

754754
end
755755

756756
% Zero-pad the audio signal at the end
757757
audio_signal = [audio_signal;zeros((number_times-1)*step_length+window_length-number_samples,number_channels)];
758758

759-
% Similarity distance in time frames
760-
similarity_distance = round(repet.similarity_distance*sampling_frequency/step_length);
759+
% Get the similarity distance in time frames
760+
similarity_distance2 = round(repet.similarity_distance*sampling_frequency/step_length);
761761

762-
% Cutoff frequency in frequency channels for the dual high-pass
763-
% filter of the foreground
764-
cutoff_frequency = ceil(repet.cutoff_frequency*(window_length-1)/sampling_frequency);
762+
% Get the cutoff frequency in frequency channels
763+
% for the dual high-pass filter of the foreground
764+
cutoff_frequency2 = ceil(repet.cutoff_frequency*window_length/sampling_frequency);
765765

766766
% Initialize the background signal
767767
background_signal = zeros((number_times-1)*step_length+window_length,number_channels);
768768

769769
% Loop over the time frames to compute the background signal
770-
for time_index = buffer_length:number_times
771-
772-
% Sample index in the signal
773-
sample_index = step_length*(time_index-1);
770+
for j = buffer_length2:number_times
774771

775-
% Time index of the current frame
776-
current_index = mod(time_index-1,buffer_length)+1;
772+
% Get the time index of the current frame
773+
j0 = mod(j-1,buffer_length2)+1;
777774

778775
% Initialize the FT of the current segment
779776
current_ft = zeros(window_length,number_channels);
780777

781778
% Loop over the channels
782-
for channel_index = 1:number_channels
779+
for i = 1:number_channels
783780

784781
% Compute the FT of the current segment
785-
current_ft(:,channel_index) ...
786-
= fft(audio_signal(1+sample_index:window_length+sample_index,channel_index).*window_function);
782+
current_ft(:,i) = fft(audio_signal(k+1:k+window_length,i).*window_function);
787783

788-
% Derive the spectrum of the current frame and update
789-
% the buffer spectrogram
790-
buffer_spectrogram(:,current_index,channel_index) ...
791-
= abs(current_ft(1:window_length/2+1,channel_index));
784+
% Derive the magnitude spectrum and update the buffer spectrogram
785+
buffer_spectrogram(:,j0,i) = abs(current_ft(1:number_frequencies,i));
792786

793787
end
794788

795-
% Cosine similarity between the spectrum of the current
796-
% frame and the past frames, for all the channels
789+
% Compute the cosine similarity between the spectrum of the current frame
790+
% and the past frames, for all the channels
797791
similarity_vector ...
798-
= repet.similaritymatrix(mean(buffer_spectrogram,3),mean(buffer_spectrogram(:,current_index,:),3));
792+
= repet.similaritymatrix(mean(buffer_spectrogram,3),mean(buffer_spectrogram(:,j0,:),3));
799793

800-
% Indices of the similar frames
794+
% Estimate the indices of the similar frames
801795
[~,similarity_indices] ...
802-
= repet.localmaxima(similarity_vector,repet.similarity_threshold,similarity_distance,repet.similarity_number);
796+
= repet.localmaxima(similarity_vector,repet.similarity_threshold,similarity_distance2,repet.similarity_number);
803797

804798
% Loop over the channels
805-
for channel_index = 1:number_channels
799+
for i = 1:number_channels
806800

807801
% Compute the repeating spectrum for the current frame
808-
repeating_spectrum = median(buffer_spectrogram(:,similarity_indices,channel_index),2);
802+
repeating_spectrum = median(buffer_spectrogram(:,similarity_indices,i),2);
809803

810804
% Refine the repeating spectrum
811-
repeating_spectrum = min(repeating_spectrum,buffer_spectrogram(:,current_index,channel_index));
805+
repeating_spectrum = min(repeating_spectrum,buffer_spectrogram(:,j0,i));
812806

813807
% Derive the repeating mask for the current frame
814-
repeating_mask = (repeating_spectrum+eps)./(buffer_spectrogram(:,current_index,channel_index)+eps);
808+
repeating_mask = (repeating_spectrum+eps)./(buffer_spectrogram(:,j0,i)+eps);
815809

816-
% High-pass filtering of the (dual) non-repeating
817-
% foreground
818-
repeating_mask(2:cutoff_frequency+1,:) = 1;
810+
% Perform a high-pass filtering of the dual foreground
811+
repeating_mask(2:cutoff_frequency2+1,:) = 1;
819812

820-
% Mirror the frequency channels
813+
% Recover the mirrored frequencies
821814
repeating_mask = cat(1,repeating_mask,repeating_mask(end-1:-1:2));
822815

823816
% Apply the mask to the FT of the current segment
824-
background_ft = repeating_mask.*current_ft(:,channel_index);
817+
background_ft = repeating_mask.*current_ft(:,i);
825818

826-
% Inverse FT of the current segment
827-
background_signal(1+sample_index:window_length+sample_index,channel_index) ...
828-
= background_signal(1+sample_index:window_length+sample_index,channel_index)+real(ifft(background_ft));
819+
% Take the inverse FT of the current segment
820+
background_signal(k+1:k+window_length,i) ...
821+
= background_signal(k+1:k+window_length,i)+real(ifft(background_ft));
829822

830823
end
831824

832-
% Update the wait bar
833-
waitbar(time_index/number_times,wait_bar);
825+
% Update the index
826+
k = k+step_length;
834827

835828
end
836829

837830
% Truncate the signal to the original length
838831
background_signal = background_signal(1:number_samples,:);
839832

840-
% Un-window the signal (just in case)
833+
% Normalize the signal by the gain introduced by the COLA (if any)
841834
background_signal = background_signal/sum(window_function(1:step_length:window_length));
842835

843-
% Close the wait bar
844-
close(wait_bar)
845-
846836
end
847837

848838
function specshow(audio_spectrogram, number_samples, sampling_frequency, xtick_step, ytick_step)

0 commit comments

Comments
 (0)