Adafruit_Learning_System_Gu.../M4_Eyes/pdmvoice.cpp
2022-02-23 13:22:52 -05:00

261 lines
11 KiB
C++

// SPDX-FileCopyrightText: 2019 Phillip Burgess for Adafruit Industries
//
// SPDX-License-Identifier: MIT
// Basic voice changer code. This version is specific to the Adafruit
// MONSTER M4SK board using a PDM microphone.
#if defined(ADAFRUIT_MONSTER_M4SK_EXPRESS)
#include "globals.h"
#include <SPI.h>
#include <Adafruit_ZeroPDMSPI.h>
#define MIN_PITCH_HZ 65
#define MAX_PITCH_HZ 1600
#define TYP_PITCH_HZ 175
static void voiceOutCallback(void);
static float actualPlaybackRate;
// PDM mic allows 1.0 to 3.25 MHz max clock (2.4 typical).
// SPI native max is is 24 MHz, so available speeds are 12, 6, 3 MHz.
#define SPI_BITRATE 3000000
// 3 MHz / 32 bits = 93,750 Hz interrupt frequency
// 2 interrupts/sample = 46,875 Hz audio sample rate
const float sampleRate = (float)SPI_BITRATE / 64.0;
// sampleRate is float in case factors change to make it not divide evenly.
// It DOES NOT CHANGE over time, only playbackRate does.
// Although SPI lib now has an option to get an SPI object's SERCOM number
// at run time, the interrupt handler MUST be declared at compile time...
// so it's necessary to know the SERCOM # ahead of time anyway, oh well.
#define PDM_SPI SPI2 // PDM mic SPI peripheral
#define PDM_SERCOM_HANDLER SERCOM3_0_Handler
Adafruit_ZeroPDMSPI pdmspi(&PDM_SPI);
static float playbackRate = sampleRate;
static uint16_t *recBuf = NULL;
// recBuf currently gets allocated (in voiceSetup()) for two full cycles of
// the lowest pitch we're likely to encounter. Right now it doesn't really
// NEED to be this size, but if pitch detection is added in the future then
// this'll become more useful.
// 46,875 sampling rate from mic, 65 Hz lowest pitch -> 2884 bytes.
static const uint16_t recBufSize = (uint16_t)(sampleRate / (float)MIN_PITCH_HZ * 2.0 + 0.5);
static int16_t recIndex = 0;
static int16_t playbackIndex = 0;
volatile uint16_t voiceLastReading = 32768;
volatile uint16_t voiceMin = 32768;
volatile uint16_t voiceMax = 32768;
#define MOD_MIN 20 // Lowest supported modulation frequency (lower = more RAM use)
static uint8_t modWave = 0; // Modulation wave type (none, sine, square, tri, saw)
static uint8_t *modBuf = NULL; // Modulation waveform buffer
static uint32_t modIndex = 0; // Current position in modBuf
static uint32_t modLen = 0; // Currently used amount of modBuf based on modFreq
// Just playing back directly from the recording circular buffer produces
// audible clicks as the waveforms rarely align at the beginning and end of
// the buffer. So what we do is advance or push back the playback index a
// certain amount when it's likely to overtake or underflow the recording
// index, and interpolate from the current to the jumped-forward-or-back
// readings over a short period. In a perfect world, that "certain amount"
// would be one wavelength of the current voice pitch...BUT...with no pitch
// detecton currently, we instead use a fixed middle-of-the-road value:
// TYP_PITCH_HZ, 175 by default, which is a bit below typical female spoken
// vocal range and a bit above typical male spoken range. This all goes out
// the window with singing, and of course young people will have a higher
// speech range, is just a crude catch-all approximation.
static const uint16_t jump = (int)(sampleRate / (float)TYP_PITCH_HZ + 0.5);
static const uint16_t interp = jump / 4; // Interp time = 1/4 waveform
static bool jumping = false;
static uint16_t jumpCount = 1;
static int16_t jumpThreshold;
static int16_t playbackIndexJumped;
static uint16_t nextOut = 2048;
float voicePitch(float p);
// START PITCH SHIFT (no arguments) ----------------------------------------
bool voiceSetup(bool modEnable) {
// Allocate circular buffer for audio
if(NULL == (recBuf = (uint16_t *)malloc(recBufSize * sizeof(uint16_t)))) {
return false; // Fail
}
// Allocate buffer for voice modulation, if enabled
if(modEnable) {
// 250 comes from min period in voicePitch()
modBuf = (uint8_t *)malloc((int)(48000000.0 / 250.0 / MOD_MIN + 0.5));
// If malloc fails, program will continue without modulation
}
pdmspi.begin(sampleRate); // Set up PDM microphone
analogWriteResolution(12); // Set up analog output
voicePitch(1.0); // Set timer interval
return true; // Success
}
// SET PITCH ---------------------------------------------------------------
// Set pitch adjustment, higher numbers = higher pitch. 0 < pitch < inf
// 0.5 = halve frequency (1 octave down)
// 1.0 = normal playback
// 2.0 = double frequency (1 octave up)
// Available pitch adjustment range depends on various hardware factors
// (SPI speed, timer/counter resolution, etc.), and the actual pitch
// adjustment (after appying constraints) will be returned.
float voicePitch(float p) {
float desiredPlaybackRate = sampleRate * p;
// Clip to sensible range
if(desiredPlaybackRate < 19200) desiredPlaybackRate = 19200; // ~0.41X
else if(desiredPlaybackRate > 192000) desiredPlaybackRate = 192000; // ~4.1X
arcada.timerCallback(desiredPlaybackRate, voiceOutCallback);
// Making this assumption here knowing Arcada will use 1:1 prescale:
int32_t period = (int32_t)(48000000.0 / desiredPlaybackRate);
actualPlaybackRate = 48000000.0 / (float)period;
p = (actualPlaybackRate / sampleRate); // New pitch
jumpThreshold = (int)(jump * p + 0.5);
return p;
}
// SET GAIN ----------------------------------------------------------------
void voiceGain(float g) {
pdmspi.setMicGain(g); // Handles its own clipping
}
// SET MODULATION ----------------------------------------------------------
// This needs to be called after any call to voicePitch() -- the modulation
// table is not currently auto-regenerated. Maybe that'll change.
void voiceMod(uint32_t freq, uint8_t waveform) {
if(modBuf) { // Ignore if no modulation buffer allocated
if(freq < MOD_MIN) freq = MOD_MIN;
modLen = (uint32_t)(actualPlaybackRate / freq + 0.5);
if(modLen < 2) modLen = 2;
if(waveform > 4) waveform = 4;
modWave = waveform;
yield();
switch(waveform) {
case 0: // None
break;
case 1: // Square
memset(modBuf, 255, modLen / 2);
memset(&modBuf[modLen / 2], 0, modLen - modLen / 2);
break;
case 2: // Sine
for(uint32_t i=0; i<modLen; i++) {
modBuf[i] = (int)((sin(M_PI * 2.0 * (float)i / (float)modLen) + 1.0) * 0.5 * 255.0 + 0.5);
}
break;
case 3: // Triangle
for(uint32_t i=0; i<modLen; i++) {
modBuf[i] = (int)(fabs(0.5 - (float)i / (float)modLen) * 2.0 * 255.0 + 0.5);
}
break;
case 4: // Sawtooth (increasing)
for(uint32_t i=0; i<modLen; i++) {
modBuf[i] = (int)((float)i / (float)(modLen - 1) * 255.0 + 0.5);
}
break;
}
}
}
// INTERRUPT HANDLERS ------------------------------------------------------
void PDM_SERCOM_HANDLER(void) {
uint16_t micReading = 0;
if(pdmspi.decimateFilterWord(&micReading, true)) {
// So, the theory is, in the future some basic pitch detection could be
// added right about here, which could be used to improve the seam
// transitions in the playback interrupt (and possibly other things,
// like dynamic adjustment of the playback rate to do monotone and other
// effects). Actual usable pitch detection on speech turns out to be One
// Of Those Nearly Insurmountable Problems In Audio Processing...if
// you're thinking "oh just count the zero crossings" "just use an FFT"
// it's really not that simple, trust me, please, I've been reading
// everything on this, speech waveforms are jerks. Had the beginnings of
// some "maybe good enough approximation for a hacky microcontroller
// project" code here, but it's pulled out for now for the sake of
// getting something not-broken in folks' hands in a sensible timeframe.
if(++recIndex >= recBufSize) recIndex = 0;
recBuf[recIndex] = micReading;
// Outside code can use the value of voiceLastReading if you want to
// do an approximate live waveform display, or dynamic gain adjustment
// based on mic input, or other stuff. This won't give you every single
// sample in the recording buffer one-by-one sequentially...it's just
// the last thing that was stored prior to whatever time you polled it,
// but may still have some uses.
voiceLastReading = micReading;
// Similarly, user code can extern these variables and monitor the
// peak-to-peak range. They are never reset in the voice code itself,
// it's the duty of the user code to reset both to 32768 periodically.
if(micReading < voiceMin) voiceMin = micReading;
else if(micReading > voiceMax) voiceMax = micReading;
}
}
static void voiceOutCallback(void) {
// Modulation is done on the output (rather than the input) because
// pitch-shifting modulated input would cause weird waveform
// discontinuities. This does require recalculating the modulation table
// any time the pitch changes though.
if(modWave) {
nextOut = (((int32_t)nextOut - 2048) * (modBuf[modIndex] + 1) / 256) + 2048;
if(++modIndex >= modLen) modIndex = 0;
}
// Do analog writes pronto so output timing is consistent
analogWrite(A0, nextOut);
analogWrite(A1, nextOut);
// Then we can take whatever variable time for processing the next cycle...
if(++playbackIndex >= recBufSize) playbackIndex = 0;
if(jumping) {
// A waveform-blending transition is in-progress
uint32_t w1 = 65536UL * jumpCount / jump, // ramp playbackIndexJumped up (14 bits)
w2 = 65536UL - w1; // ramp playbackIndex down (14 bits)
nextOut = (recBuf[playbackIndexJumped] * w1 + recBuf[playbackIndex] * w2) >> 20; // 28 bit result->12 bits
if(++jumpCount >= jump) {
playbackIndex = playbackIndexJumped;
jumpCount = 1;
jumping = false;
} else {
if(++playbackIndexJumped >= recBufSize) playbackIndexJumped = 0;
}
} else {
nextOut = recBuf[playbackIndex] >> 4; // 16->12 bit
if(playbackRate >= sampleRate) { // Sped up
// Playback may overtake recording, need to back off periodically
int16_t dist = (recIndex >= playbackIndex) ?
(recIndex - playbackIndex) : (recBufSize - (playbackIndex - recIndex));
if(dist <= jumpThreshold) {
playbackIndexJumped = playbackIndex - jump;
if(playbackIndexJumped < 0) playbackIndexJumped += recBufSize;
jumping = true;
}
} else { // Slowed down
// Playback may underflow recording, need to advance periodically
int16_t dist = (playbackIndex >= recIndex) ?
(playbackIndex - recIndex) : (recBufSize - 1 - (recIndex - playbackIndex));
if(dist <= jumpThreshold) {
playbackIndexJumped = (playbackIndex + jump) % recBufSize;
jumping = true;
}
}
}
}
#endif // ADAFRUIT_MONSTER_M4SK_EXPRESS