Adafruit_Learning_System_Gu.../MonsterMaskVoiceChanger/pdmvoice.ino
2022-02-23 13:44:48 -05:00

392 lines
18 KiB
C++

// SPDX-FileCopyrightText: 2019 Phillip Burgess for Adafruit Industries
//
// SPDX-License-Identifier: MIT
// Basic voice changer code. This version is specific to the Adafruit
// MONSTER M4SK board using a PDM microphone.
#include <SPI.h>
#define MIN_PITCH_HZ 65
#define MAX_PITCH_HZ 1600
#define TYP_PITCH_HZ 175
// Playback timer stuff - use TC3 on MONSTER M4SK (no TC4 on this board)
#define TIMER TC3
#define TIMER_IRQN TC3_IRQn
#define TIMER_IRQ_HANDLER TC3_Handler
#define TIMER_GCLK_ID TC3_GCLK_ID
#define TIMER_GCM_ID GCM_TC2_TC3
// PDM mic allows 1.0 to 3.25 MHz max clock (2.4 typical).
// SPI native max is is 24 MHz, so available speeds are 12, 6, 3 MHz.
#define SPI_BITRATE 3000000
static SPISettings settings(SPI_BITRATE, LSBFIRST, SPI_MODE0);
// 3 MHz / 32 bits = 93,750 Hz interrupt frequency
// 2 interrupts/sample = 46,875 Hz audio sample rate
const float sampleRate = (float)SPI_BITRATE / 64.0;
// sampleRate is float in case factors change to make it not divide evenly.
// It DOES NOT CHANGE over time, only playbackRate does.
// Although SPI lib now has an option to get an SPI object's SERCOM number
// at run time, the interrupt handler MUST be declared at compile time...
// so it's necessary to know the SERCOM # ahead of time anyway, oh well.
#define PDM_SERCOM SERCOM3 // PDM mic SPI SERCOM on MONSTER M4SK
#define PDM_SPI SPI2 // PDM mic SPI peripheral
#define PDM_SERCOM_HANDLER SERCOM3_0_Handler
#define PDM_SERCOM_IRQn SERCOM3_0_IRQn // _0_IRQn is DRE interrupt
static Sercom *sercom;
static volatile uint32_t *dataReg;
Sercom * const sercomList[] = {
SERCOM0, SERCOM1, SERCOM2, SERCOM3,
#if defined(SERCOM4)
SERCOM4,
#endif
#if defined(SERCOM5)
SERCOM5,
#endif
#if defined(SERCOM6)
SERCOM6,
#endif
#if defined(SERCOM7)
SERCOM7,
#endif
};
static float playbackRate = sampleRate;
static uint16_t *recBuf = NULL;
// recBuf currently gets allocated (in voiceSetup()) for two full cycles of
// the lowest pitch we're likely to encounter. Right now it doesn't really
// NEED to be this size, but if pitch detection is added in the future then
// this'll become more useful.
// 46,875 sampling rate from mic, 65 Hz lowest pitch -> 2884 bytes.
static const uint16_t recBufSize = (uint16_t)(sampleRate / (float)MIN_PITCH_HZ * 2.0 + 0.5);
static int16_t recIndex = 0;
static int16_t playbackIndex = 0;
volatile uint16_t voiceLastReading = 0;
#define DC_PERIOD 4096 // Recalculate DC offset this many samplings
// DC_PERIOD does NOT need to be a power of 2, but might save a few cycles.
// PDM rate is 46875, so 4096 = 11.44 times/sec
static uint16_t dcCounter = 0; // Rolls over every DC_PERIOD samples
static uint32_t dcSum = 0; // Accumulates DC_PERIOD samples
static uint16_t dcOffsetPrior = 32768; // DC offset interpolates linearly
static uint16_t dcOffsetNext = 32768; // between these two values
static uint16_t micGain = 256; // 1:1
// Just playing back directly from the recording circular buffer produces
// audible clicks as the waveforms rarely align at the beginning and end of
// the buffer. So what we do is advance or push back the playback index a
// certain amount when it's likely to overtake or underflow the recording
// index, and interpolate from the current to the jumped-forward-or-back
// readings over a short period. In a perfect world, that "certain amount"
// would be one wavelength of the current voice pitch...BUT...with no pitch
// detecton currently, we instead use a fixed middle-of-the-road value:
// TYP_PITCH_HZ, 175 by default, which is a bit below typical female spoken
// vocal range and a bit above typical male spoken range. This all goes out
// the window with singing, and of course young people will have a higher
// speech range, is just a crude catch-all approximation.
static const uint16_t jump = (int)(sampleRate / (float)TYP_PITCH_HZ + 0.5);
static const uint16_t interp = jump / 4; // Interp time = 1/4 waveform
static bool jumping = false;
static uint16_t jumpCount = 1;
static int16_t jumpThreshold;
static int16_t playbackIndexJumped;
static uint16_t nextOut = 2048;
float voicePitch(float p);
// START PITCH SHIFT (no arguments) ----------------------------------------
bool voiceSetup(void) {
// Allocate circular buffer for audio
if(NULL == (recBuf = (uint16_t *)malloc(recBufSize * sizeof(uint16_t)))) {
return false; // Fail
}
// Set up PDM microphone input -------------------------------------------
PDM_SPI.begin();
PDM_SPI.beginTransaction(settings); // this SPI transaction is left open
sercom = sercomList[PDM_SPI.getSercomIndex()];
dataReg = PDM_SPI.getDataRegister();
// Enabling 32-bit SPI must be done AFTER SPI.begin() which
// resets registers. But SPI.CTRLC (where 32-bit mode is set) is
// enable-protected, so peripheral must be disabled temporarily...
sercom->SPI.CTRLA.bit.ENABLE = 0; // Disable SPI
while(sercom->SPI.SYNCBUSY.bit.ENABLE); // Wait for disable
sercom->SPI.CTRLC.bit.DATA32B = 1; // Enable 32-bit mode
sercom->SPI.CTRLA.bit.ENABLE = 1; // Re-enable SPI
while(sercom->SPI.SYNCBUSY.bit.ENABLE); // Wait for enable
// 4-byte word length is implicit in 32-bit mode,
// no need to set up LENGTH register.
sercom->SPI.INTENSET.bit.DRE = 1; // Data-register-empty interrupt
NVIC_DisableIRQ(PDM_SERCOM_IRQn);
NVIC_ClearPendingIRQ(PDM_SERCOM_IRQn);
NVIC_SetPriority(PDM_SERCOM_IRQn, 0); // Top priority
NVIC_EnableIRQ(PDM_SERCOM_IRQn);
sercom->SPI.DATA.bit.DATA = 0; // Kick off SPI free-run
// Set up analog output & timer ------------------------------------------
analogWriteResolution(12);
// Feed TIMER off GCLK1 (already set to 48 MHz by Arduino core)
GCLK->PCHCTRL[TIMER_GCLK_ID].bit.CHEN = 0; // Disable channel
while(GCLK->PCHCTRL[TIMER_GCLK_ID].bit.CHEN); // Wait for disable
GCLK_PCHCTRL_Type pchctrl;
pchctrl.bit.GEN = GCLK_PCHCTRL_GEN_GCLK1_Val;
pchctrl.bit.CHEN = 1;
GCLK->PCHCTRL[TIMER_GCLK_ID].reg = pchctrl.reg;
while(!GCLK->PCHCTRL[TIMER_GCLK_ID].bit.CHEN); // Wait for enable
// Disable timer before configuring it
TIMER->COUNT16.CTRLA.bit.ENABLE = 0;
while(TIMER->COUNT16.SYNCBUSY.bit.ENABLE);
// 16-bit counter mode, 1:1 prescale, match-frequency generation mode
TIMER->COUNT16.CTRLA.bit.MODE = TC_CTRLA_MODE_COUNT16;
TIMER->COUNT16.CTRLA.bit.PRESCALER = TC_CTRLA_PRESCALER_DIV1_Val;
TIMER->COUNT16.WAVE.bit.WAVEGEN = TC_WAVE_WAVEGEN_MFRQ_Val;
TIMER->COUNT16.CTRLBCLR.reg = TC_CTRLBCLR_DIR; // Count up
while(TIMER->COUNT16.SYNCBUSY.bit.CTRLB);
voicePitch(1.0); // Set timer interval
TIMER->COUNT16.INTENSET.reg = TC_INTENSET_OVF; // Overflow interrupt
NVIC_DisableIRQ(TIMER_IRQN);
NVIC_ClearPendingIRQ(TIMER_IRQN);
NVIC_SetPriority(TIMER_IRQN, 0); // Top priority
NVIC_EnableIRQ(TIMER_IRQN);
TIMER->COUNT16.CTRLA.bit.ENABLE = 1; // Enable timer
while(TIMER->COUNT16.SYNCBUSY.bit.ENABLE); // Wait for it
return true; // Success
}
// SET PITCH ---------------------------------------------------------------
// Set pitch adjustment, higher numbers = higher pitch. 0 < pitch < inf
// 0.5 = halve frequency (1 octave down)
// 1.0 = normal playback
// 2.0 = double frequency (1 octave up)
// Available pitch adjustment range depends on various hardware factors
// (SPI speed, timer/counter resolution, etc.), and the actual pitch
// adjustment (after appying constraints) will be returned.
float voicePitch(float p) {
float desiredPlaybackRate = sampleRate * p;
int32_t period = (int32_t)(48000000.0 / desiredPlaybackRate + 0.5);
if(period > 2500) period = 2500; // Hard limit is 65536, 2.5K is a practical limit
else if(period < 250) period = 250; // Leave some cycles for IRQ handler
TIMER->COUNT16.CC[0].reg = period - 1;
while(TIMER->COUNT16.SYNCBUSY.bit.CC0);
float actualPlaybackRate = 48000000.0 / (float)period;
p = (actualPlaybackRate / sampleRate); // New pitch
jumpThreshold = (int)(jump * p + 0.5);
return p;
}
// SET GAIN ----------------------------------------------------------------
void voiceGain(float g) {
if(g >= (65535.0/256.0)) micGain = 65535;
else if(g < 0.0) micGain = 0;
else micGain = (uint16_t)(g * 256.0 + 0.5);
}
// INTERRUPT HANDLERS ------------------------------------------------------
static uint16_t const sincfilter[64] = { 0, 2, 9, 21, 39, 63, 94, 132, 179, 236, 302, 379, 467, 565, 674, 792, 920, 1055, 1196, 1341, 1487, 1633, 1776, 1913, 2042, 2159, 2263, 2352, 2422, 2474, 2506, 2516, 2506, 2474, 2422, 2352, 2263, 2159, 2042, 1913, 1776, 1633, 1487, 1341, 1196, 1055, 920, 792, 674, 565, 467, 379, 302, 236, 179, 132, 94, 63, 39, 21, 9, 2, 0, 0 };
void PDM_SERCOM_HANDLER(void) {
static bool evenWord = 1; // Alternates 0/1 with each interrupt call
static uint32_t sumTemp = 0; // Temp. value used across 2 interrupt calls
// Shenanigans: SPI data read/write are shadowed...even though it appears
// the same register here, it's legit to write new MOSI value before
// reading the received MISO value from the same location. This helps
// avoid a gap between words...provides a steady stream of bits.
*dataReg = 0; // Write clears DRE flag, starts next xfer
uint32_t sample = *dataReg; // Read last-received word
uint32_t sum = 0; // local var = register = faster than sumTemp
if(evenWord) { // Even-numbered 32-bit word...
// At default speed and optimization settings (120 MHz -Os), the PDM-
// servicing interrupt consumes about 12.5% of CPU time. Though this
// code looks bulky, it's actually reasonably efficient (sincfilter[] is
// const, so these compile down to constants, there is no array lookup,
// any any zero-value element refs will be removed by the compiler).
// Tested MANY methods and this was hard to beat. One managed just under
// 10% load, but required 4KB of tables...not worth it for small boost.
// Can get an easy boost with overclock and optimizer tweaks.
if(sample & 0x00000001) sum += sincfilter[ 0];
if(sample & 0x00000002) sum += sincfilter[ 1];
if(sample & 0x00000004) sum += sincfilter[ 2];
if(sample & 0x00000008) sum += sincfilter[ 3];
if(sample & 0x00000010) sum += sincfilter[ 4];
if(sample & 0x00000020) sum += sincfilter[ 5];
if(sample & 0x00000040) sum += sincfilter[ 6];
if(sample & 0x00000080) sum += sincfilter[ 7];
if(sample & 0x00000100) sum += sincfilter[ 8];
if(sample & 0x00000200) sum += sincfilter[ 9];
if(sample & 0x00000400) sum += sincfilter[10];
if(sample & 0x00000800) sum += sincfilter[11];
if(sample & 0x00001000) sum += sincfilter[12];
if(sample & 0x00002000) sum += sincfilter[13];
if(sample & 0x00004000) sum += sincfilter[14];
if(sample & 0x00008000) sum += sincfilter[15];
if(sample & 0x00010000) sum += sincfilter[16];
if(sample & 0x00020000) sum += sincfilter[17];
if(sample & 0x00040000) sum += sincfilter[18];
if(sample & 0x00080000) sum += sincfilter[19];
if(sample & 0x00100000) sum += sincfilter[20];
if(sample & 0x00200000) sum += sincfilter[21];
if(sample & 0x00400000) sum += sincfilter[22];
if(sample & 0x00800000) sum += sincfilter[23];
if(sample & 0x01000000) sum += sincfilter[24];
if(sample & 0x02000000) sum += sincfilter[25];
if(sample & 0x04000000) sum += sincfilter[26];
if(sample & 0x08000000) sum += sincfilter[27];
if(sample & 0x10000000) sum += sincfilter[28];
if(sample & 0x20000000) sum += sincfilter[29];
if(sample & 0x40000000) sum += sincfilter[30];
if(sample & 0x80000000) sum += sincfilter[31];
sumTemp = sum; // Copy register to static var for next call
} else {
if(sample & 0x00000001) sum += sincfilter[32];
if(sample & 0x00000002) sum += sincfilter[33];
if(sample & 0x00000004) sum += sincfilter[34];
if(sample & 0x00000008) sum += sincfilter[35];
if(sample & 0x00000010) sum += sincfilter[36];
if(sample & 0x00000020) sum += sincfilter[37];
if(sample & 0x00000040) sum += sincfilter[38];
if(sample & 0x00000080) sum += sincfilter[39];
if(sample & 0x00000100) sum += sincfilter[40];
if(sample & 0x00000200) sum += sincfilter[41];
if(sample & 0x00000400) sum += sincfilter[42];
if(sample & 0x00000800) sum += sincfilter[43];
if(sample & 0x00001000) sum += sincfilter[44];
if(sample & 0x00002000) sum += sincfilter[45];
if(sample & 0x00004000) sum += sincfilter[46];
if(sample & 0x00008000) sum += sincfilter[47];
if(sample & 0x00010000) sum += sincfilter[48];
if(sample & 0x00020000) sum += sincfilter[49];
if(sample & 0x00040000) sum += sincfilter[50];
if(sample & 0x00080000) sum += sincfilter[51];
if(sample & 0x00100000) sum += sincfilter[52];
if(sample & 0x00200000) sum += sincfilter[53];
if(sample & 0x00400000) sum += sincfilter[54];
if(sample & 0x00800000) sum += sincfilter[55];
if(sample & 0x01000000) sum += sincfilter[56];
if(sample & 0x02000000) sum += sincfilter[57];
if(sample & 0x04000000) sum += sincfilter[58];
if(sample & 0x08000000) sum += sincfilter[59];
if(sample & 0x10000000) sum += sincfilter[60];
if(sample & 0x20000000) sum += sincfilter[61];
if(sample & 0x40000000) sum += sincfilter[62];
if(sample & 0x80000000) sum += sincfilter[63];
sum += sumTemp; // Add static var from last call
// 'sum' is new raw audio value -- process it --------------------------
uint16_t dcOffset;
dcSum += sum; // Accumulate long-term average for DC offset correction
if(++dcCounter < DC_PERIOD) {
// Interpolate between dcOffsetPrior and dcOffsetNext
dcOffset = dcOffsetPrior + (dcOffsetNext - dcOffsetPrior) * dcCounter / DC_PERIOD;
} else {
// End of period reached, move 'next' to 'previous,' calc new 'next' from avg
dcOffsetPrior = dcOffset = dcOffsetNext;
dcOffsetNext = dcSum / DC_PERIOD;
dcCounter = dcSum = 0;
}
// Adjust raw reading by DC offset to center (ish) it, scale by mic gain
int32_t adjusted = ((int32_t)sum - dcOffset) * micGain / 256;
// Go back to uint16_t space and clip to 16-bit range
adjusted += 32768;
if(adjusted > 65535) adjusted = 65535;
else if(adjusted < 0) adjusted = 0;
// So, the theory is, in the future some basic pitch detection could be
// added right about here, which could be used to improve the seam
// transitions in the playback interrupt (and possibly other things,
// like dynamic adjustment of the playback rate to do monotone and other
// effects). Actual usable pitch detection on speech turns out to be One
// Of Those Nearly Insurmountable Problems In Audio Processing...if
// you're thinking "oh just count the zero crossings" "just use an FFT"
// it's really not that simple, trust me, please, I've been reading
// everything on this, speech waveforms are jerks. Had the beginnings of
// some "maybe good enough approximation for a hacky microcontroller
// project" code here, but it's pulled out for now for the sake of
// getting something not-broken in folks' hands in a sensible timeframe.
if(++recIndex >= recBufSize) recIndex = 0;
recBuf[recIndex] = adjusted;
// Outside code can use the value of voiceLastReading if you want to
// do an approximate live waveform display, or dynamic gain adjustment
// based on mic input, or other stuff. This won't give you every single
// sample in the recording buffer one-by-one sequentially...it's just
// the last thing that was stored prior to whatever time you polled it,
// but may still have some uses.
voiceLastReading = adjusted;
}
evenWord ^= 1;
}
// Playback timer interrupt
void TIMER_IRQ_HANDLER(void) {
TIMER->COUNT16.INTFLAG.reg = TC_INTFLAG_OVF;
// Do analog writes pronto so output timing is consistent
analogWrite(A0, nextOut);
analogWrite(A1, nextOut);
// Then we can take whatever variable time for processing the next cycle...
if(++playbackIndex >= recBufSize) playbackIndex = 0;
if(jumping) {
// A waveform-blending transition is in-progress
uint32_t w1 = 65536UL * jumpCount / jump, // ramp playbackIndexJumped up (14 bits)
w2 = 65536UL - w1; // ramp playbackIndex down (14 bits)
nextOut = (recBuf[playbackIndexJumped] * w1 + recBuf[playbackIndex] * w2) >> 20; // 28 bit result->12 bits
if(++jumpCount >= jump) {
playbackIndex = playbackIndexJumped;
jumpCount = 1;
jumping = false;
} else {
if(++playbackIndexJumped >= recBufSize) playbackIndexJumped = 0;
}
} else {
nextOut = recBuf[playbackIndex] >> 4; // 16->12 bit
if(playbackRate >= sampleRate) { // Sped up
// Playback may overtake recording, need to back off periodically
int16_t dist = (recIndex >= playbackIndex) ?
(recIndex - playbackIndex) : (recBufSize - (playbackIndex - recIndex));
if(dist <= jumpThreshold) {
playbackIndexJumped = playbackIndex - jump;
if(playbackIndexJumped < 0) playbackIndexJumped += recBufSize;
jumping = true;
}
} else { // Slowed down
// Playback may underflow recording, need to advance periodically
int16_t dist = (playbackIndex >= recIndex) ?
(playbackIndex - recIndex) : (recBufSize - (recIndex - playbackIndex));
if(dist <= jumpThreshold) {
playbackIndexJumped = (playbackIndex + jump) % recBufSize;
jumping = true;
}
}
}
}