261 lines
11 KiB
C++
261 lines
11 KiB
C++
// SPDX-FileCopyrightText: 2019 Phillip Burgess for Adafruit Industries
|
|
//
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
// Basic voice changer code. This version is specific to the Adafruit
|
|
// MONSTER M4SK board using a PDM microphone.
|
|
|
|
#if defined(ADAFRUIT_MONSTER_M4SK_EXPRESS)
|
|
|
|
#include "globals.h"
|
|
#include <SPI.h>
|
|
#include <Adafruit_ZeroPDMSPI.h>
|
|
|
|
#define MIN_PITCH_HZ 65
|
|
#define MAX_PITCH_HZ 1600
|
|
#define TYP_PITCH_HZ 175
|
|
|
|
static void voiceOutCallback(void);
|
|
static float actualPlaybackRate;
|
|
|
|
// PDM mic allows 1.0 to 3.25 MHz max clock (2.4 typical).
|
|
// SPI native max is is 24 MHz, so available speeds are 12, 6, 3 MHz.
|
|
#define SPI_BITRATE 3000000
|
|
// 3 MHz / 32 bits = 93,750 Hz interrupt frequency
|
|
// 2 interrupts/sample = 46,875 Hz audio sample rate
|
|
const float sampleRate = (float)SPI_BITRATE / 64.0;
|
|
// sampleRate is float in case factors change to make it not divide evenly.
|
|
// It DOES NOT CHANGE over time, only playbackRate does.
|
|
|
|
// Although SPI lib now has an option to get an SPI object's SERCOM number
|
|
// at run time, the interrupt handler MUST be declared at compile time...
|
|
// so it's necessary to know the SERCOM # ahead of time anyway, oh well.
|
|
#define PDM_SPI SPI2 // PDM mic SPI peripheral
|
|
#define PDM_SERCOM_HANDLER SERCOM3_0_Handler
|
|
|
|
Adafruit_ZeroPDMSPI pdmspi(&PDM_SPI);
|
|
|
|
static float playbackRate = sampleRate;
|
|
static uint16_t *recBuf = NULL;
|
|
// recBuf currently gets allocated (in voiceSetup()) for two full cycles of
|
|
// the lowest pitch we're likely to encounter. Right now it doesn't really
|
|
// NEED to be this size, but if pitch detection is added in the future then
|
|
// this'll become more useful.
|
|
// 46,875 sampling rate from mic, 65 Hz lowest pitch -> 2884 bytes.
|
|
static const uint16_t recBufSize = (uint16_t)(sampleRate / (float)MIN_PITCH_HZ * 2.0 + 0.5);
|
|
static int16_t recIndex = 0;
|
|
static int16_t playbackIndex = 0;
|
|
|
|
volatile uint16_t voiceLastReading = 32768;
|
|
volatile uint16_t voiceMin = 32768;
|
|
volatile uint16_t voiceMax = 32768;
|
|
|
|
#define MOD_MIN 20 // Lowest supported modulation frequency (lower = more RAM use)
|
|
static uint8_t modWave = 0; // Modulation wave type (none, sine, square, tri, saw)
|
|
static uint8_t *modBuf = NULL; // Modulation waveform buffer
|
|
static uint32_t modIndex = 0; // Current position in modBuf
|
|
static uint32_t modLen = 0; // Currently used amount of modBuf based on modFreq
|
|
|
|
// Just playing back directly from the recording circular buffer produces
|
|
// audible clicks as the waveforms rarely align at the beginning and end of
|
|
// the buffer. So what we do is advance or push back the playback index a
|
|
// certain amount when it's likely to overtake or underflow the recording
|
|
// index, and interpolate from the current to the jumped-forward-or-back
|
|
// readings over a short period. In a perfect world, that "certain amount"
|
|
// would be one wavelength of the current voice pitch...BUT...with no pitch
|
|
// detecton currently, we instead use a fixed middle-of-the-road value:
|
|
// TYP_PITCH_HZ, 175 by default, which is a bit below typical female spoken
|
|
// vocal range and a bit above typical male spoken range. This all goes out
|
|
// the window with singing, and of course young people will have a higher
|
|
// speech range, is just a crude catch-all approximation.
|
|
static const uint16_t jump = (int)(sampleRate / (float)TYP_PITCH_HZ + 0.5);
|
|
static const uint16_t interp = jump / 4; // Interp time = 1/4 waveform
|
|
static bool jumping = false;
|
|
static uint16_t jumpCount = 1;
|
|
static int16_t jumpThreshold;
|
|
static int16_t playbackIndexJumped;
|
|
static uint16_t nextOut = 2048;
|
|
|
|
float voicePitch(float p);
|
|
|
|
// START PITCH SHIFT (no arguments) ----------------------------------------
|
|
|
|
bool voiceSetup(bool modEnable) {
|
|
|
|
// Allocate circular buffer for audio
|
|
if(NULL == (recBuf = (uint16_t *)malloc(recBufSize * sizeof(uint16_t)))) {
|
|
return false; // Fail
|
|
}
|
|
|
|
// Allocate buffer for voice modulation, if enabled
|
|
if(modEnable) {
|
|
// 250 comes from min period in voicePitch()
|
|
modBuf = (uint8_t *)malloc((int)(48000000.0 / 250.0 / MOD_MIN + 0.5));
|
|
// If malloc fails, program will continue without modulation
|
|
}
|
|
|
|
pdmspi.begin(sampleRate); // Set up PDM microphone
|
|
analogWriteResolution(12); // Set up analog output
|
|
voicePitch(1.0); // Set timer interval
|
|
|
|
return true; // Success
|
|
}
|
|
|
|
// SET PITCH ---------------------------------------------------------------
|
|
|
|
// Set pitch adjustment, higher numbers = higher pitch. 0 < pitch < inf
|
|
// 0.5 = halve frequency (1 octave down)
|
|
// 1.0 = normal playback
|
|
// 2.0 = double frequency (1 octave up)
|
|
// Available pitch adjustment range depends on various hardware factors
|
|
// (SPI speed, timer/counter resolution, etc.), and the actual pitch
|
|
// adjustment (after appying constraints) will be returned.
|
|
float voicePitch(float p) {
|
|
float desiredPlaybackRate = sampleRate * p;
|
|
// Clip to sensible range
|
|
if(desiredPlaybackRate < 19200) desiredPlaybackRate = 19200; // ~0.41X
|
|
else if(desiredPlaybackRate > 192000) desiredPlaybackRate = 192000; // ~4.1X
|
|
arcada.timerCallback(desiredPlaybackRate, voiceOutCallback);
|
|
// Making this assumption here knowing Arcada will use 1:1 prescale:
|
|
int32_t period = (int32_t)(48000000.0 / desiredPlaybackRate);
|
|
actualPlaybackRate = 48000000.0 / (float)period;
|
|
p = (actualPlaybackRate / sampleRate); // New pitch
|
|
jumpThreshold = (int)(jump * p + 0.5);
|
|
return p;
|
|
}
|
|
|
|
// SET GAIN ----------------------------------------------------------------
|
|
|
|
void voiceGain(float g) {
|
|
pdmspi.setMicGain(g); // Handles its own clipping
|
|
}
|
|
|
|
// SET MODULATION ----------------------------------------------------------
|
|
|
|
// This needs to be called after any call to voicePitch() -- the modulation
|
|
// table is not currently auto-regenerated. Maybe that'll change.
|
|
|
|
void voiceMod(uint32_t freq, uint8_t waveform) {
|
|
if(modBuf) { // Ignore if no modulation buffer allocated
|
|
if(freq < MOD_MIN) freq = MOD_MIN;
|
|
modLen = (uint32_t)(actualPlaybackRate / freq + 0.5);
|
|
if(modLen < 2) modLen = 2;
|
|
if(waveform > 4) waveform = 4;
|
|
modWave = waveform;
|
|
yield();
|
|
switch(waveform) {
|
|
case 0: // None
|
|
break;
|
|
case 1: // Square
|
|
memset(modBuf, 255, modLen / 2);
|
|
memset(&modBuf[modLen / 2], 0, modLen - modLen / 2);
|
|
break;
|
|
case 2: // Sine
|
|
for(uint32_t i=0; i<modLen; i++) {
|
|
modBuf[i] = (int)((sin(M_PI * 2.0 * (float)i / (float)modLen) + 1.0) * 0.5 * 255.0 + 0.5);
|
|
}
|
|
break;
|
|
case 3: // Triangle
|
|
for(uint32_t i=0; i<modLen; i++) {
|
|
modBuf[i] = (int)(fabs(0.5 - (float)i / (float)modLen) * 2.0 * 255.0 + 0.5);
|
|
}
|
|
break;
|
|
case 4: // Sawtooth (increasing)
|
|
for(uint32_t i=0; i<modLen; i++) {
|
|
modBuf[i] = (int)((float)i / (float)(modLen - 1) * 255.0 + 0.5);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// INTERRUPT HANDLERS ------------------------------------------------------
|
|
|
|
void PDM_SERCOM_HANDLER(void) {
|
|
uint16_t micReading = 0;
|
|
if(pdmspi.decimateFilterWord(&micReading, true)) {
|
|
// So, the theory is, in the future some basic pitch detection could be
|
|
// added right about here, which could be used to improve the seam
|
|
// transitions in the playback interrupt (and possibly other things,
|
|
// like dynamic adjustment of the playback rate to do monotone and other
|
|
// effects). Actual usable pitch detection on speech turns out to be One
|
|
// Of Those Nearly Insurmountable Problems In Audio Processing...if
|
|
// you're thinking "oh just count the zero crossings" "just use an FFT"
|
|
// it's really not that simple, trust me, please, I've been reading
|
|
// everything on this, speech waveforms are jerks. Had the beginnings of
|
|
// some "maybe good enough approximation for a hacky microcontroller
|
|
// project" code here, but it's pulled out for now for the sake of
|
|
// getting something not-broken in folks' hands in a sensible timeframe.
|
|
if(++recIndex >= recBufSize) recIndex = 0;
|
|
recBuf[recIndex] = micReading;
|
|
|
|
// Outside code can use the value of voiceLastReading if you want to
|
|
// do an approximate live waveform display, or dynamic gain adjustment
|
|
// based on mic input, or other stuff. This won't give you every single
|
|
// sample in the recording buffer one-by-one sequentially...it's just
|
|
// the last thing that was stored prior to whatever time you polled it,
|
|
// but may still have some uses.
|
|
voiceLastReading = micReading;
|
|
|
|
// Similarly, user code can extern these variables and monitor the
|
|
// peak-to-peak range. They are never reset in the voice code itself,
|
|
// it's the duty of the user code to reset both to 32768 periodically.
|
|
if(micReading < voiceMin) voiceMin = micReading;
|
|
else if(micReading > voiceMax) voiceMax = micReading;
|
|
}
|
|
}
|
|
|
|
static void voiceOutCallback(void) {
|
|
|
|
// Modulation is done on the output (rather than the input) because
|
|
// pitch-shifting modulated input would cause weird waveform
|
|
// discontinuities. This does require recalculating the modulation table
|
|
// any time the pitch changes though.
|
|
if(modWave) {
|
|
nextOut = (((int32_t)nextOut - 2048) * (modBuf[modIndex] + 1) / 256) + 2048;
|
|
if(++modIndex >= modLen) modIndex = 0;
|
|
}
|
|
|
|
// Do analog writes pronto so output timing is consistent
|
|
analogWrite(A0, nextOut);
|
|
analogWrite(A1, nextOut);
|
|
// Then we can take whatever variable time for processing the next cycle...
|
|
|
|
if(++playbackIndex >= recBufSize) playbackIndex = 0;
|
|
|
|
if(jumping) {
|
|
// A waveform-blending transition is in-progress
|
|
uint32_t w1 = 65536UL * jumpCount / jump, // ramp playbackIndexJumped up (14 bits)
|
|
w2 = 65536UL - w1; // ramp playbackIndex down (14 bits)
|
|
nextOut = (recBuf[playbackIndexJumped] * w1 + recBuf[playbackIndex] * w2) >> 20; // 28 bit result->12 bits
|
|
if(++jumpCount >= jump) {
|
|
playbackIndex = playbackIndexJumped;
|
|
jumpCount = 1;
|
|
jumping = false;
|
|
} else {
|
|
if(++playbackIndexJumped >= recBufSize) playbackIndexJumped = 0;
|
|
}
|
|
} else {
|
|
nextOut = recBuf[playbackIndex] >> 4; // 16->12 bit
|
|
if(playbackRate >= sampleRate) { // Sped up
|
|
// Playback may overtake recording, need to back off periodically
|
|
int16_t dist = (recIndex >= playbackIndex) ?
|
|
(recIndex - playbackIndex) : (recBufSize - (playbackIndex - recIndex));
|
|
if(dist <= jumpThreshold) {
|
|
playbackIndexJumped = playbackIndex - jump;
|
|
if(playbackIndexJumped < 0) playbackIndexJumped += recBufSize;
|
|
jumping = true;
|
|
}
|
|
} else { // Slowed down
|
|
// Playback may underflow recording, need to advance periodically
|
|
int16_t dist = (playbackIndex >= recIndex) ?
|
|
(playbackIndex - recIndex) : (recBufSize - 1 - (recIndex - playbackIndex));
|
|
if(dist <= jumpThreshold) {
|
|
playbackIndexJumped = (playbackIndex + jump) % recBufSize;
|
|
jumping = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif // ADAFRUIT_MONSTER_M4SK_EXPRESS
|