Archive

Archive for June, 2014

Audio Time Scale Modification – Phase Vocoder Implementation in Matlab

June 2, 2014 Leave a comment
function synthSignal = pl_phaseVocoder_variable_analysis_hop(signal, tsm_factor, winSamps)
% A phase locked vocoder time-scale modification algorithm based upon Jean
% Laroche's 1999 work. The synthesis hop is fixed at one quarter the analysis window (hanning) while the analysis hop is scaled by the time scale factor; this results in more FFT computations than the bonada 2000 approach for slowing down - but provides smoother transitions frame to frame of the magnitude spectrums and, in general, a better quality of output. This code started
% out as the code provided by Tae Hong Park, but has changed significantly
% over the years
%
% David Dorran, Audio Research Group, Dublin Institute of Technology
% david.dorran@dit.ie
% http://eleceng.dit.ie/dorran
% http://eleceng.dit.ie/arg
%

% make sure input is mono and transpose if necessary
[r, c] = size(signal);
if r > 1
    signal = signal';
end;
[r, c] = size(signal);
if r > 1
    disp('Note :only works on mono signals');
    synthSignal = [];
    return
end;

% add a few zeros to stop the algorithm failing
zpad = zeros(1, 44100/4);
signal = [signal, zpad];
if nargin < 3
    winSamps = 2048;
end

winSampsPow2 = winSamps;
synHopSamps = winSampsPow2/4;
anHopSamps = round(synHopSamps/tsm_factor);

win = hanning(winSampsPow2);

X = specgram(signal, winSampsPow2, 100,win, winSampsPow2 - anHopSamps);

moduli = abs(X);
phases = angle(X);

[numBins, numFrames ] = size(phases);

syn_phases = zeros(numBins, numFrames); % a holder for synthesis phases

twoPi   = 2*pi;
omega   = twoPi * anHopSamps * [0:numBins-1]'/numBins/2; %the expected phase hop per frame

syn_phases(:,1) = phases(:,1) .* ( synHopSamps/ anHopSamps);

for idx =  2: numFrames
    ddx = idx - 1;
    deltaPhi = princarg(phases(:,idx) - phases(:,ddx) -omega); %calculate priciple determination of the hetrodyned phase increment
    phaseInc = (omega+deltaPhi)/anHopSamps; % phase increment per sample
    %locate the peaks
    pk_indices = [];
    pk_indices  =  locate_peaks(moduli(:,idx));
    if(~length(pk_indices))
        pk_indices = [1 10 12]; % just in case an odd situation is encountered  e.g. a sequence of zeros
    end
    %update phase of each peak channel using the phase propagation formula
    syn_phases(pk_indices,idx)    = syn_phases(pk_indices,ddx)+synHopSamps*phaseInc(pk_indices); %synthesis phase
    %update phase of channels in region of influence
    % first calculate angle of rotation
    rotation_angles = syn_phases(pk_indices,idx) - phases(pk_indices,idx);
    start_point = 1; %initialize the starting point of the region of influence

    for k = 1: length(pk_indices) -1
        peak = pk_indices(k);
        angle_rotation  = rotation_angles(k);
        next_peak = pk_indices(k+1);
        end_point = round((peak + next_peak)/2);
        ri_indices = [start_point : peak-1, (peak+1) : end_point]; %indices of the region of influence
        syn_phases(ri_indices,idx) = angle_rotation + phases(ri_indices, idx);
        start_point = end_point + 1;
    end;
end;

%Make sure that the LHS and RHS of the DFT's of the synthesis frames are a
%complex conjuget of each other
Z = moduli.*exp(i*syn_phases);
Z = Z(1:(numBins),:);
conj_Z = conj(flipud(Z(2:size(Z,1) -1,:)));
Z = [Z;conj_Z];

synthSignal = zeros(round(length(signal)*tsm_factor+length(win)), 1);

curStart = 1;
for idx = 1:numFrames-1
    curEnd   = curStart + length(win) - 1;
    rIFFT    = real(ifft(Z(:,idx)));
    synthSignal([curStart:curEnd]) = synthSignal([curStart:curEnd]) + rIFFT.*win;
    curStart = curStart + synHopSamps;
end

%--------------------------------------------------------------------------
function indices = locate_peaks(ip)
%function to find peaks
%a peak is any sample which is greater than its two nearest neighbours
	index = 1;
	num = 2;
    indices = [];
	for k = 1 + num : length(ip) - num
		seg = ip(k-num:k+num);
		[val, max_index] = max(seg);
		if max_index == num + 1
			indices(index) = k;
			index = index + 1;
		end;
	end;
    
%--------------------------------------------------------------------------
function Phase = princarg(Phasein)
    two_pi = 2*pi;
    a = Phasein/two_pi;
    k = round(a);
    Phase = Phasein-k*two_pi;


Audio time-scale modification – VSOLA algorithm in matlab

June 2, 2014 Leave a comment
function op = vsola(ip, tsm_factor, P)
% Implementation of the variable parameter  synchronised overlap add VSOLA algorithm 
% This implementation makes use of the standard SOLA algorithm (Rocus and Wilgus, ICASSP 1986) and some efficient paramter settings for the SOLA algorithm (Dorran, Lawlor and Coyle, ICASSP 2003) and (Dorran, Lawlor and Coyle, DAFX 2003)
% Given an input, ip, the longest likely pitch period, P, and  and a time scale modification factor, tsm_factor, return an output, op, that is a time scaled version of the input. The synthesis_overlap_size is the length, in samples of the lowest likely fundametal period of the input.
% for speech synthesis_overlap_size is set to (16/1000)*fs samples and for music synthesis_overlap_size is typically set to (20/1000)*fs samples 
%
% David Dorran, Audio Research Group, Dublin Institute of Technology
% david.dorran@dit.ie
% http://eleceng.dit.ie/dorran
% http://eleceng.dit.ie/arg
%

% make sure input is mono and transpose if necessary
[r, c] = size(ip);
if r > 1
    ip = ip';
end;    
[r, c] = size(ip);
if r > 1
    disp('Note :only works on mono signals');
    op = [];
    return
end;

% initialize the values of analysis_frame_length, analysis_window_offset, synthesis_frame_offset and length_of_overlap
desired_tsm_len = round(length(ip)*tsm_factor);
P = round(P); %longest likely pitch period in samples
Lmax = round(P * 1.5);% found this to a reasonable value for the Lmax- Lmax is the duration over which the correlation function is applied
stationary_length = (P * 1.5); % found this to a reasonable value for the stationary length - This is the max duration that could be discarded/repeated

analysis_window_offset = round((stationary_length - P)/abs(tsm_factor - 1)); % this equation was derived.
synthesis_window_offset = round(analysis_window_offset * tsm_factor);
analysis_frame_length = round(Lmax + synthesis_window_offset);
number_of_analysis_frames = floor((length(ip)- analysis_frame_length)/analysis_window_offset);

if number_of_analysis_frames < 2 %not much time-scaling being done just return the input
    op = ip;
    return;
end;

%the next two lines just ensure that the last frame finishes at the very end of the signal (not essential)
zpad = zeros(1, (number_of_analysis_frames*analysis_window_offset) + analysis_frame_length - length(ip));
ip = [ip zpad];

%initialize the output
op = zeros(1, desired_tsm_len);
%initialize the first output frame
op(1 : analysis_frame_length) = ip(1 : analysis_frame_length);

min_overlap = round(Lmax - P); %ensure that there is some minimum overlap
count = 0;

% Loop for the 2nd analysis frame to the number_of_analysis_frames
for m = 1 : number_of_analysis_frames
    
    %grab the mth input frame
    ip_frame = ip(analysis_window_offset * m : (analysis_window_offset * m) + analysis_frame_length - 1);
    
    %grab the maximum overlapping segments from the inout frame and the current output
    seg_1 = op(round(synthesis_window_offset*(m-1))+analysis_frame_length - Lmax : round(synthesis_window_offset*(m-1))+analysis_frame_length -1);
    seg_2 = ip_frame(1: Lmax);
    
    %compute the correlation of these segments
    correlation   = xcorr(seg_2, seg_1,'coeff');

    %Find the best point to overlap (opt_overlap_length) making sure not to exceed the maximum or go below the minimum overlap.
    correlation(length(correlation) - Lmax -1: length(correlation)) = -100;
    correlation(1: min_overlap) = -100;    
    [max_correlation, opt_overlap_length] = max(correlation);
    
    if(max_correlation == 0)
        opt_overlap_length = Lmax;
    end;
%     offset = Lmax - opt_overlap_length;
%     if ((offset + analysis_window_offset -  synthesis_window_offset) >= 0 & (offset + analysis_window_offset -  synthesis_window_offset) <= P)
%         count = count +1;
%     end;
    
    % append mth analysis frame to the current synthesised output using a linear cross fade
    ov_seg = linear_cross_fade(seg_1, ip_frame, opt_overlap_length);
    ov_len =(round(synthesis_window_offset*m)+analysis_frame_length) - (round(synthesis_window_offset*(m-1))+analysis_frame_length - Lmax) + 1;
    ov_seg = ov_seg(1:ov_len);
    op(round(synthesis_window_offset*(m-1))+analysis_frame_length - Lmax: round(synthesis_window_offset*m)+analysis_frame_length) = ov_seg;
    
end; % end of for loop

% linear cross fade the first segment with the second segment given a certain amount of overlap
% |----------seg1----------|
%			   |---------------seg2-----------------------|
%              |--overlap-|

function op = linear_cross_fade(seg_1, seg_2, overlap_length, cross_fade_duration)

    error(nargchk(3,4,nargin));
    if nargin < 4
        cross_fade_duration = overlap_length;
    end
    if cross_fade_duration > overlap_length
        cross_fade_duration = overlap_length;
    end
	% overlap the end of seg_1 with the start of seg_2 using a linear cross-fade
	if (length(seg_1) < overlap_length),
        seg_2 = seg_2(overlap_length - length(seg_1) + 1: length(seg_2));
		overlap_length = length(seg_1);
	end; % end of if statement

	if (length(seg_2) < overlap_length),
        seg_1 = seg_1(length(seg_1) - (overlap_length - overlap_length): length(seg_1));
		overlap_length = length(seg_2);
	end; % end of if statement

    
	overlapping_region = zeros(1, cross_fade_duration); % initialize the overlapping region
    seg_1 = seg_1(1: length(seg_1) - (overlap_length - cross_fade_duration));
    
	op = zeros(1, length(seg_1) + length(seg_2) - cross_fade_duration);

	if(overlap_length ~= 1)
		linear_ramp1 = (cross_fade_duration-1:-1:0) ./(cross_fade_duration-1);
		linear_ramp2 = (0:1:cross_fade_duration-1) ./(cross_fade_duration-1);
		overlapping_region = (seg_1(length(seg_1)-cross_fade_duration+1:length(seg_1)).*linear_ramp1) + (seg_2(1: cross_fade_duration).*linear_ramp2);
	end;
	op = [seg_1(1:length(seg_1)-cross_fade_duration) ,overlapping_region , seg_2(cross_fade_duration+1:length(seg_2))];
% END of linear_cross_fade function