audio_effect_pitch_shift.cpp source code [Godot/servers/audio/effects/audio_effect_pitch_shift.cpp]

1	/************************************************************************/
2	/ audio_effect_pitch_shift.cpp /
3	/************************************************************************/
4	/ This file is part of: /
5	/ GODOT ENGINE /
6	/ https://godotengine.org /
7	/************************************************************************/
8	/ Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). /
9	/ Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. /
10	/ /
11	/ Permission is hereby granted, free of charge, to any person obtaining /
12	/ a copy of this software and associated documentation files (the /
13	/ "Software"), to deal in the Software without restriction, including /
14	/ without limitation the rights to use, copy, modify, merge, publish, /
15	/ distribute, sublicense, and/or sell copies of the Software, and to /
16	/ permit persons to whom the Software is furnished to do so, subject to /
17	/ the following conditions: /
18	/ /
19	/ The above copyright notice and this permission notice shall be /
20	/ included in all copies or substantial portions of the Software. /
21	/ /
22	/ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, /
23	/ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF /
24	/ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. /
25	/ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY /
26	/ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, /
27	/ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE /
28	/ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /
29	/************************************************************************/
30
31	#include "audio_effect_pitch_shift.h"
32
33	#include "core/math/math_funcs.h"
34	#include "servers/audio_server.h"
35
36	/ Thirdparty code, so disable clang-format with Godot style /
37	/ clang-format off /
38
39	/****************************************************************************
40	*
41	* NAME: smbPitchShift.cpp
42	* VERSION: 1.2
43	* HOME URL: https://blogs.zynaptiq.com/bernsee
44	* KNOWN BUGS: none
45	*
46	* SYNOPSIS: Routine for doing pitch shifting while maintaining
47	* duration using the Short Time Fourier Transform.
48	*
49	* DESCRIPTION: The routine takes a pitchShift factor value which is between 0.5
50	* (one octave down) and 2. (one octave up). A value of exactly 1 does not change
51	* the pitch. numSampsToProcess tells the routine how many samples in indata[0...
52	* numSampsToProcess-1] should be pitch shifted and moved to outdata[0 ...
53	* numSampsToProcess-1]. The two buffers can be identical (ie. it can process the
54	* data in-place). fftFrameSize defines the FFT frame size used for the
55	* processing. Typical values are 1024, 2048 and 4096. It may be any value <=
56	* MAX_FRAME_LENGTH but it MUST be a power of 2. osamp is the STFT
57	* oversampling factor which also determines the overlap between adjacent STFT
58	* frames. It should at least be 4 for moderate scaling ratios. A value of 32 is
59	* recommended for best quality. sampleRate takes the sample rate for the signal
60	* in unit Hz, ie. 44100 for 44.1 kHz audio. The data passed to the routine in
61	* indata[] should be in the range [-1.0, 1.0), which is also the output range
62	* for the data, make sure you scale the data accordingly (for 16bit signed integers
63	* you would have to divide (and multiply) by 32768).
64	*
65	* COPYRIGHT 1999-2015 Stephan M. Bernsee <s.bernsee [AT] zynaptiq [DOT] com>
66	*
67	* The Wide Open License (WOL)
68	*
69	* Permission to use, copy, modify, distribute and sell this software and its
70	* documentation for any purpose is hereby granted without fee, provided that
71	* the above copyright notice and this license appear in all source copies.
72	* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF
73	* ANY KIND. See https://dspguru.com/wide-open-license/ for more information.
74	*
75	*****************************************************************************/
76
77	void SMBPitchShift::PitchShift(float pitchShift, long numSampsToProcess, long fftFrameSize, long osamp, float sampleRate, float indata, float* outdata,int* stride) {
78
79
80	/*
81	Routine smbPitchShift(). See top of file for explanation
82	Purpose: doing pitch shifting while maintaining duration using the Short
83	Time Fourier Transform.
84	Author: (c)1999-2015 Stephan M. Bernsee <s.bernsee [AT] zynaptiq [DOT] com>
85	*/
86
87	double magn, phase, tmp, window, real, imag;
88	double freqPerBin, expct;
89	long i,k, qpd, index, inFifoLatency, stepSize, fftFrameSize2;
90
91	/ set up some handy variables /
92	fftFrameSize2 = fftFrameSize/`2`;
93	stepSize = fftFrameSize/osamp;
94	freqPerBin = sampleRate/(double)fftFrameSize;
95	expct = `2.`Math_PI(double)stepSize/(double)fftFrameSize;
96	inFifoLatency = fftFrameSize-stepSize;
97	if (gRover == `0`) { gRover = inFifoLatency;
98	}
99
100	/ initialize our static arrays /
101
102	/ main processing loop /
103	for (i = `0`; i < numSampsToProcess; i++){
104	/ As long as we have not yet collected enough data just read in /
105	gInFIFO[gRover] = indata[i*stride];
106	outdata[i*stride] = gOutFIFO[gRover-inFifoLatency];
107	gRover++;
108
109	/ now we have enough data for processing /
110	if (gRover >= fftFrameSize) {
111	gRover = inFifoLatency;
112
113	/ do windowing and re,im interleave /
114	for (k = `0`; k < fftFrameSize;k++) {
115	window = -`.5`cos(`2.`Math_PI(double)k/(double*)fftFrameSize)+`.5`;
116	gFFTworksp[`2`k] = gInFIFO[k] window;
117	gFFTworksp[`2`*k+`1`] = `0.`;
118	}
119
120
121	/ *************** ANALYSIS ***************** /
122	/ do transform /
123	smbFft(gFFTworksp, fftFrameSize, -`1`);
124
125	/ this is the analysis step /
126	for (k = `0`; k <= fftFrameSize2; k++) {
127	/ de-interlace FFT buffer /
128	real = gFFTworksp[`2`*k];
129	imag = gFFTworksp[`2`*k+`1`];
130
131	/ compute magnitude and phase /
132	magn = `2.`sqrt(realreal + imag*imag);
133	phase = atan2(imag,real);
134
135	/ compute phase difference /
136	tmp = phase - gLastPhase[k];
137	gLastPhase[k] = phase;
138
139	/ subtract expected phase difference /
140	tmp -= (double)k*expct;
141
142	/ map delta phase into +/- Pi interval /
143	qpd = tmp/Math_PI;
144	if (qpd >= `0`) { qpd += qpd&`1`;
145	} else { qpd -= qpd&`1`;
146	}
147	tmp -= Math_PI(double*)qpd;
148
149	/ get deviation from bin frequency from the +/- Pi interval /
150	tmp = osamptmp/(`2.`Math_PI);
151
152	/ compute the k-th partials' true frequency /
153	tmp = (double)kfreqPerBin + tmpfreqPerBin;
154
155	/ store magnitude and true frequency in analysis arrays /
156	gAnaMagn[k] = magn;
157	gAnaFreq[k] = tmp;
158
159	}
160
161	/ *************** PROCESSING ***************** /
162	/ this does the actual pitch shifting /
163	memset(gSynMagn, `0`, fftFrameSize*sizeof(float));
164	memset(gSynFreq, `0`, fftFrameSize*sizeof(float));
165	for (k = `0`; k <= fftFrameSize2; k++) {
166	index = k*pitchShift;
167	if (index <= fftFrameSize2) {
168	gSynMagn[index] += gAnaMagn[k];
169	gSynFreq[index] = gAnaFreq[k] * pitchShift;
170	}
171	}
172
173	/ *************** SYNTHESIS ***************** /
174	/ this is the synthesis step /
175	for (k = `0`; k <= fftFrameSize2; k++) {
176	/ get magnitude and true frequency from synthesis arrays /
177	magn = gSynMagn[k];
178	tmp = gSynFreq[k];
179
180	/ subtract bin mid frequency /
181	tmp -= (double)k*freqPerBin;
182
183	/ get bin deviation from freq deviation /
184	tmp /= freqPerBin;
185
186	/ take osamp into account /
187	tmp = `2.`Math_PItmp/osamp;
188
189	/ add the overlap phase advance back in /
190	tmp += (double)k*expct;
191
192	/ accumulate delta phase to get bin phase /
193	gSumPhase[k] += tmp;
194	phase = gSumPhase[k];
195
196	/ get real and imag part and re-interleave /
197	gFFTworksp[`2`k] = magncos(phase);
198	gFFTworksp[`2`k+`1`] = magnsin(phase);
199	}
200
201	/ zero negative frequencies /
202	for (k = fftFrameSize+`2`; k < `2`*fftFrameSize; k++) { gFFTworksp[k] = `0.`;
203	}
204
205	/ do inverse transform /
206	smbFft(gFFTworksp, fftFrameSize, `1`);
207
208	/ do windowing and add to output accumulator /
209	for(k=`0`; k < fftFrameSize; k++) {
210	window = -`.5`cos(`2.`Math_PI(double)k/(double*)fftFrameSize)+`.5`;
211	gOutputAccum[k] += `2.`windowgFFTworksp[`2`k]/(fftFrameSize2osamp);
212	}
213	for (k = `0`; k < stepSize; k++) { gOutFIFO[k] = gOutputAccum[k];
214	}
215
216	/ shift accumulator /
217	memmove(gOutputAccum, gOutputAccum+stepSize, fftFrameSize*sizeof(float));
218
219	/ move input FIFO /
220	for (k = `0`; k < inFifoLatency; k++) { gInFIFO[k] = gInFIFO[k+stepSize];
221	}
222	}
223	}
224	}
225
226
227
228	void SMBPitchShift::smbFft(float fftBuffer, long* fftFrameSize, long sign)
229	/*
230	FFT routine, (C)1996 S.M.Bernsee. Sign = -1 is FFT, 1 is iFFT (inverse)
231	Fills fftBuffer[0...2fftFrameSize-1] with the Fourier transform of the*
232	time domain data in fftBuffer[0...2fftFrameSize-1]. The FFT array takes*
233	and returns the cosine and sine parts in an interleaved manner, ie.
234	fftBuffer[0] = cosPart[0], fftBuffer[1] = sinPart[0], asf. fftFrameSize
235	must be a power of 2. It expects a complex input signal (see footnote 2),
236	ie. when working with 'common' audio signals our input signal has to be
237	passed as {in[0],0.,in[1],0.,in[2],0.,...} asf. In that case, the transform
238	of the frequencies of interest is in fftBuffer[0...fftFrameSize].
239	*/
240	{
241	float wr, wi, arg, p1, p2, temp;
242	float tr, ti, ur, ui, p1r, p1i, p2r, p2i;
243	long i, bitm, j, le, le2, k;
244
245	for (i = `2`; i < `2`*fftFrameSize-`2`; i += `2`) {
246	for (bitm = `2`, j = `0`; bitm < `2`*fftFrameSize; bitm <<= `1`) {
247	if (i & bitm) { j++;
248	}
249	j <<= `1`;
250	}
251	if (i < j) {
252	p1 = fftBuffer+i; p2 = fftBuffer+j;
253	temp = p1; (p1++) = *p2;
254	(p2++) = temp; temp = p1;
255	p1 = p2; *p2 = temp;
256	}
257	}
258	for (k = `0`, le = `2`; k < (long)(log((double)fftFrameSize)/log(`2.`)+`.5`); k++) {
259	le <<= `1`;
260	le2 = le>>`1`;
261	ur = `1.0`;
262	ui = `0.0`;
263	arg = Math_PI / (le2>>`1`);
264	wr = cos(arg);
265	wi = sign*sin(arg);
266	for (j = `0`; j < le2; j += `2`) {
267	p1r = fftBuffer+j; p1i = p1r+`1`;
268	p2r = p1r+le2; p2i = p2r+`1`;
269	for (i = j; i < `2`*fftFrameSize; i += le) {
270	tr = p2r ur - p2i ui;
271	ti = p2r ui + p2i ur;
272	p2r = p1r - tr; p2i = p1i - ti;
273	p1r += tr; p1i += ti;
274	p1r += le; p1i += le;
275	p2r += le; p2i += le;
276	}
277	tr = urwr - uiwi;
278	ui = urwi + uiwr;
279	ur = tr;
280	}
281	}
282	}
283
284
285	/ Godot code again /
286	/ clang-format on /
287
288	void AudioEffectPitchShiftInstance::process(const AudioFrame p_src_frames, AudioFrame p_dst_frames, int p_frame_count) {
289	float sample_rate = AudioServer::get_singleton()->get_mix_rate();
290
291	float in_l = (float* *)p_src_frames;
292	float *in_r = in_l + `1`;
293
294	float out_l = (float* *)p_dst_frames;
295	float *out_r = out_l + `1`;
296
297	shift_l.PitchShift(base ->pitch_scale, p_frame_count, fft_size, base ->oversampling, sample_rate, in_l, out_l, `2`);
298	shift_r.PitchShift(base ->pitch_scale, p_frame_count, fft_size, base ->oversampling, sample_rate, in_r, out_r, `2`);
299	}
300
301	Ref<AudioEffectInstance> AudioEffectPitchShift::instantiate() {
302	Ref<AudioEffectPitchShiftInstance> ins;
303	ins.instantiate();
304	ins ->base = Ref<AudioEffectPitchShift>(this);
305	static const int fft_sizes[FFT_SIZE_MAX] = { `256`, `512`, `1024`, `2048`, `4096` };
306	ins ->fft_size = fft_sizes[fft_size];
307
308	return ins;
309	}
310
311	void AudioEffectPitchShift::set_pitch_scale(float p_pitch_scale) {
312	ERR_FAIL_COND(!(p_pitch_scale > `0.0`));
313	pitch_scale = p_pitch_scale;
314	}
315
316	float AudioEffectPitchShift::get_pitch_scale() const {
317	return pitch_scale;
318	}
319
320	void AudioEffectPitchShift::set_oversampling(int p_oversampling) {
321	ERR_FAIL_COND(p_oversampling < `4`);
322	oversampling = p_oversampling;
323	}
324
325	int AudioEffectPitchShift::get_oversampling() const {
326	return oversampling;
327	}
328
329	void AudioEffectPitchShift::set_fft_size(FFTSize p_fft_size) {
330	ERR_FAIL_INDEX(p_fft_size, FFT_SIZE_MAX);
331	fft_size = p_fft_size;
332	}
333
334	AudioEffectPitchShift::FFTSize AudioEffectPitchShift::get_fft_size() const {
335	return fft_size;
336	}
337
338	void AudioEffectPitchShift::_bind_methods() {
339	ClassDB::bind_method(D_METHOD("set_pitch_scale", "rate"), &AudioEffectPitchShift::set_pitch_scale);
340	ClassDB::bind_method(D_METHOD("get_pitch_scale"), &AudioEffectPitchShift::get_pitch_scale);
341
342	ClassDB::bind_method(D_METHOD("set_oversampling", "amount"), &AudioEffectPitchShift::set_oversampling);
343	ClassDB::bind_method(D_METHOD("get_oversampling"), &AudioEffectPitchShift::get_oversampling);
344
345	ClassDB::bind_method(D_METHOD("set_fft_size", "size"), &AudioEffectPitchShift::set_fft_size);
346	ClassDB::bind_method(D_METHOD("get_fft_size"), &AudioEffectPitchShift::get_fft_size);
347
348	ADD_PROPERTY(PropertyInfo (Variant::FLOAT, "pitch_scale", PROPERTY_HINT_RANGE, "0.01,16,0.01"), "set_pitch_scale", "get_pitch_scale");
349	ADD_PROPERTY(PropertyInfo (Variant::FLOAT, "oversampling", PROPERTY_HINT_RANGE, "4,32,1"), "set_oversampling", "get_oversampling");
350	ADD_PROPERTY(PropertyInfo (Variant::INT, "fft_size", PROPERTY_HINT_ENUM, "256,512,1024,2048,4096"), "set_fft_size", "get_fft_size");
351
352	BIND_ENUM_CONSTANT(FFT_SIZE_256);
353	BIND_ENUM_CONSTANT(FFT_SIZE_512);
354	BIND_ENUM_CONSTANT(FFT_SIZE_1024);
355	BIND_ENUM_CONSTANT(FFT_SIZE_2048);
356	BIND_ENUM_CONSTANT(FFT_SIZE_4096);
357	BIND_ENUM_CONSTANT(FFT_SIZE_MAX);
358	}
359
360	AudioEffectPitchShift::AudioEffectPitchShift() {
361	pitch_scale = `1.0`;
362	oversampling = `4`;
363	fft_size = FFT_SIZE_2048;
364	wet = `0.0`;
365	dry = `0.0`;
366	filter = false;
367	}
368

Browse the source code of Godot/servers/audio/effects/audio_effect_pitch_shift.cpp