Adafruit_Learning_System_Gu.../LittleConnectionMachine/RaspberryPi/audio.py

212 lines
10 KiB
Python

# SPDX-FileCopyrightText: 2022 Phillip Burgess for Adafruit Industries
#
# SPDX-License-Identifier: MIT
"""
Audio spectrum display for Little Connection Machine. This is designed to be
fun to look at, not a Serious Audio Tool(tm). Requires USB microphone & ALSA
config. Prerequisite libraries include PyAudio and NumPy:
sudo apt-get install libatlas-base-dev libportaudio2
pip3 install numpy pyaudio
See the following for ALSA config (use Stretch directions):
learn.adafruit.com/usb-audio-cards-with-a-raspberry-pi/updating-alsa-config
"""
import math
import time
import numpy as np
import pyaudio
from cm1 import CM1
# FFT configurables. These numbers are 'hard,' actual figures:
RATE = 11025 # For audio vis, don't want or need high sample rate!
FFT_SIZE = 128 # Audio samples to read per frame (for FFT input)
ROWS = 32 # FFT output filtered down to this many 'buckets'
# Then things start getting subjective. For example, the lower and upper
# ends of the FFT output don't make a good contribution to the resulting
# graph...either too noisy, or out of musical range. Clip a range between
# between 0 and FFT_SIZE-1. These aren't hard science, they were determined
# by playing various music and seeing what looked good:
LEAST = 1 # Lowest bin of FFT output to use
MOST = 111 # Highest bin of FFT output to use
# And moreso. Normally, FFT results are linearly spaced by frequency,
# and with music this results in a crowded low end and sparse high end.
# The visualizer reformats this logarithmically so octaves are linearly
# spaced...the low end is expanded, upper end compressed. But just picking
# individial FFT bins will cause visual dropouts. Instead, a number of
# inputs are merged into each output, and because of the logarithmic scale,
# that number needs to be focused near the low end and spread out among
# many samples toward the top. Again, not scientific, these were derived
# empirically by throwing music at it and adjusting:
FIRST_WIDTH = 2 # Width of sampling curve at low end
LAST_WIDTH = 40 # Width of sampling curve at high end
# Except for ROWS above, none of this is involved in the actual rendering
# of the graph, just how the data is massaged. If modifying this for your
# own FFT-based visualizer, you could keep this around and just change the
# drawing parts of the main loop.
class AudioSpectrum(CM1):
"""Audio spectrum display for Little Connection Machine."""
# pylint: disable=too-many-locals
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) # CM1 base initialization
# Access USB mic via PyAudio
audio = pyaudio.PyAudio()
self.stream = audio.open(
format=pyaudio.paInt16, # 16-bit int
channels=1, # Mono
rate=RATE,
input=True,
output=False,
frames_per_buffer=FFT_SIZE,
)
# Precompute a few items for the math to follow
first_center_log = math.log2(LEAST + 0.5)
center_log_spread = math.log2(MOST + 0.5) - first_center_log
width_low_log = math.log2(FIRST_WIDTH)
width_log_spread = math.log2(LAST_WIDTH) - width_low_log
# As mentioned earlier, each row of the graph is filtered down from
# multiple FFT elements. These lists are involved in that filtering,
# each has one item per row of output:
self.low_bin = [] # First FFT bin that contributes to row
self.bin_weight = [] # List of subsequent FFT element weightings
self.bin_sum = [] # Precomputed sum of bin_weight for row
self.noise = [] # Subtracted from FFT output (see note later)
for row in range(ROWS): # For each row...
# Calc center & spread of cubic curve for bin weighting
center_log = first_center_log + center_log_spread * row / (ROWS - 1)
center_linear = 2**center_log
width_log = width_low_log + width_log_spread * row / (ROWS - 1)
width_linear = 2**width_log
half_width = width_linear * 0.5
lower = center_linear - half_width
upper = center_linear + half_width
low_bin = int(lower) # First FFT element to use
hi_bin = min(FFT_SIZE - 1, int(upper)) # Last "
weights = [] # FFT weights for row
for bin_num in range(low_bin, hi_bin + 1):
bin_center = bin_num + 0.5
dist = abs(bin_center - center_linear) / half_width
if dist < 1.0: # Filter out a math stragglers at either end
# Bin weights have a cubic falloff curve within range:
dist = 1.0 - dist # Invert dist so 1.0 is at center
weight = ((3.0 - (dist * 2.0)) * dist) * dist
weights.append(weight)
self.bin_weight.append(weights) # Save list of weights for row
self.bin_sum.append(sum(weights)) # And sum of weights
self.low_bin.append(low_bin) # And first FFT bin index
# FFT output always has a little "sparkle" due to ambient hum.
# Subtracting a bit helps. Noise varies per element, more at low
# end...this table is just a non-scientific fudge factor...
self.noise.append(int(2.4 ** (4 - 4 * row / ROWS)))
def run(self):
"""Main loop for audio visualizer."""
# Some tables associated with each row of the display. These are
# visualizer specific, not part of the FFT processing, so they're
# here instead of part of the class above.
width = [0 for _ in range(ROWS)] # Current row width
peak = [0 for _ in range(ROWS)] # Recent row peak
dropv = [0.0 for _ in range(ROWS)] # Current peak falling speed
autolevel = [32.0 for _ in range(ROWS)] # Per-row auto adjust
start_time = time.monotonic()
frames = 0
while True:
# Read bytes from PyAudio stream, convert to int16, process
# via NumPy's FFT function...
data_8 = self.stream.read(FFT_SIZE * 2, exception_on_overflow=False)
data_16 = np.frombuffer(data_8, np.int16)
fft_out = np.fft.fft(data_16, norm="ortho")
# fft_out will have FFT_SIZE * 2 elements, mirrored at center
# Get spectrum of first half. Instead of square root for
# magnitude, use something between square and cube root.
# No scientific reason, just looked good.
spec_y = [
(c.real * c.real + c.imag * c.imag) ** 0.4 for c in fft_out[0:FFT_SIZE]
]
self.clear() # Clear canvas before drawing
for row in range(ROWS): # Low to high freq...
# Weigh & sum up all the FFT outputs affecting this row
total = 0
for idx, weight in enumerate(self.bin_weight[row]):
total += (spec_y[self.low_bin[row] + idx]) * weight
total /= self.bin_sum[row]
# Auto-leveling is intended to make each column 'pop'.
# When a particular column isn't getting a lot of input
# from the FFT, gradually boost that column's sensitivity.
if total > autolevel[row]: # New level is louder
# Make autolevel rise quickly if column total exceeds it
autolevel[row] = autolevel[row] * 0.25 + total * 0.75
else: # New level is softer
# And fall slowly otherwise
autolevel[row] = autolevel[row] * 0.98 + total * 0.02
# Autolevel limit keeps things from getting TOO boosty.
# Trial and error, no science to this number.
autolevel[row] = max(autolevel[row], 20)
# Apply autoleveling to weighted input.
# This is the prelim. row width before further filtering...
total *= 18 / autolevel[row] # 18 is 1/2 display width
# ...then filter the column width computed above
if total > width[row]:
# If it's greater than this column's current width,
# move column's width quickly in that direction
width[row] = width[row] * 0.3 + total * 0.7
else:
# If less, move slowly down
width[row] = width[row] * 0.5 + total * 0.5
# Compute "peak dots," which sort of show the recent
# peak level for each column (mostly just neat to watch).
if width[row] > peak[row]:
# If column exceeds old peak, move peak immediately,
# give it a slight upward boost.
dropv[row] = (peak[row] - width[row]) * 0.07
peak[row] = min(width[row], 18)
else:
# Otherwise, peak gradually accelerates down
dropv[row] += 0.2
peak[row] -= dropv[row]
# Draw bar for this row. It's done as a gradient,
# bright toward center, dim toward edge.
iwidth = int(width[row] + 0.5) # Integer width
drow = ROWS - 1 - row # Display row, reverse of freq row
if iwidth > 0:
iwidth = min(iwidth, 18) # Clip to 18 pixels
scale = self.brightness * iwidth / 18 # Center brightness
for col in range(iwidth):
level = int(scale * ((1.0 - col / iwidth) ** 2.6))
self.draw.point([17 - col, drow], fill=level)
self.draw.point([18 + col, drow], fill=level)
# Draw peak dot
if peak[row] > 0:
col = int(peak[row] + 0.5)
self.draw.point([17 - col, drow], fill=self.brightness)
self.draw.point([18 + col, drow], fill=self.brightness)
# Update matrices and show est. frames/second
self.redraw()
frames += 1
elapsed = time.monotonic() - start_time
print(frames / elapsed)
if __name__ == "__main__":
MY_APP = AudioSpectrum() # Instantiate class, calls __init__() above
MY_APP.process()