212 lines
10 KiB
Python
212 lines
10 KiB
Python
# SPDX-FileCopyrightText: 2022 Phillip Burgess for Adafruit Industries
|
|
#
|
|
# SPDX-License-Identifier: MIT
|
|
|
|
"""
|
|
Audio spectrum display for Little Connection Machine. This is designed to be
|
|
fun to look at, not a Serious Audio Tool(tm). Requires USB microphone & ALSA
|
|
config. Prerequisite libraries include PyAudio and NumPy:
|
|
sudo apt-get install libatlas-base-dev libportaudio2
|
|
pip3 install numpy pyaudio
|
|
See the following for ALSA config (use Stretch directions):
|
|
learn.adafruit.com/usb-audio-cards-with-a-raspberry-pi/updating-alsa-config
|
|
"""
|
|
|
|
import math
|
|
import time
|
|
import numpy as np
|
|
import pyaudio
|
|
from cm1 import CM1
|
|
|
|
# FFT configurables. These numbers are 'hard,' actual figures:
|
|
RATE = 11025 # For audio vis, don't want or need high sample rate!
|
|
FFT_SIZE = 128 # Audio samples to read per frame (for FFT input)
|
|
ROWS = 32 # FFT output filtered down to this many 'buckets'
|
|
# Then things start getting subjective. For example, the lower and upper
|
|
# ends of the FFT output don't make a good contribution to the resulting
|
|
# graph...either too noisy, or out of musical range. Clip a range between
|
|
# between 0 and FFT_SIZE-1. These aren't hard science, they were determined
|
|
# by playing various music and seeing what looked good:
|
|
LEAST = 1 # Lowest bin of FFT output to use
|
|
MOST = 111 # Highest bin of FFT output to use
|
|
# And moreso. Normally, FFT results are linearly spaced by frequency,
|
|
# and with music this results in a crowded low end and sparse high end.
|
|
# The visualizer reformats this logarithmically so octaves are linearly
|
|
# spaced...the low end is expanded, upper end compressed. But just picking
|
|
# individial FFT bins will cause visual dropouts. Instead, a number of
|
|
# inputs are merged into each output, and because of the logarithmic scale,
|
|
# that number needs to be focused near the low end and spread out among
|
|
# many samples toward the top. Again, not scientific, these were derived
|
|
# empirically by throwing music at it and adjusting:
|
|
FIRST_WIDTH = 2 # Width of sampling curve at low end
|
|
LAST_WIDTH = 40 # Width of sampling curve at high end
|
|
# Except for ROWS above, none of this is involved in the actual rendering
|
|
# of the graph, just how the data is massaged. If modifying this for your
|
|
# own FFT-based visualizer, you could keep this around and just change the
|
|
# drawing parts of the main loop.
|
|
|
|
|
|
class AudioSpectrum(CM1):
|
|
"""Audio spectrum display for Little Connection Machine."""
|
|
|
|
# pylint: disable=too-many-locals
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs) # CM1 base initialization
|
|
|
|
# Access USB mic via PyAudio
|
|
audio = pyaudio.PyAudio()
|
|
self.stream = audio.open(
|
|
format=pyaudio.paInt16, # 16-bit int
|
|
channels=1, # Mono
|
|
rate=RATE,
|
|
input=True,
|
|
output=False,
|
|
frames_per_buffer=FFT_SIZE,
|
|
)
|
|
|
|
# Precompute a few items for the math to follow
|
|
first_center_log = math.log2(LEAST + 0.5)
|
|
center_log_spread = math.log2(MOST + 0.5) - first_center_log
|
|
width_low_log = math.log2(FIRST_WIDTH)
|
|
width_log_spread = math.log2(LAST_WIDTH) - width_low_log
|
|
|
|
# As mentioned earlier, each row of the graph is filtered down from
|
|
# multiple FFT elements. These lists are involved in that filtering,
|
|
# each has one item per row of output:
|
|
self.low_bin = [] # First FFT bin that contributes to row
|
|
self.bin_weight = [] # List of subsequent FFT element weightings
|
|
self.bin_sum = [] # Precomputed sum of bin_weight for row
|
|
self.noise = [] # Subtracted from FFT output (see note later)
|
|
|
|
for row in range(ROWS): # For each row...
|
|
# Calc center & spread of cubic curve for bin weighting
|
|
center_log = first_center_log + center_log_spread * row / (ROWS - 1)
|
|
center_linear = 2**center_log
|
|
width_log = width_low_log + width_log_spread * row / (ROWS - 1)
|
|
width_linear = 2**width_log
|
|
half_width = width_linear * 0.5
|
|
lower = center_linear - half_width
|
|
upper = center_linear + half_width
|
|
low_bin = int(lower) # First FFT element to use
|
|
hi_bin = min(FFT_SIZE - 1, int(upper)) # Last "
|
|
weights = [] # FFT weights for row
|
|
for bin_num in range(low_bin, hi_bin + 1):
|
|
bin_center = bin_num + 0.5
|
|
dist = abs(bin_center - center_linear) / half_width
|
|
if dist < 1.0: # Filter out a math stragglers at either end
|
|
# Bin weights have a cubic falloff curve within range:
|
|
dist = 1.0 - dist # Invert dist so 1.0 is at center
|
|
weight = ((3.0 - (dist * 2.0)) * dist) * dist
|
|
weights.append(weight)
|
|
self.bin_weight.append(weights) # Save list of weights for row
|
|
self.bin_sum.append(sum(weights)) # And sum of weights
|
|
self.low_bin.append(low_bin) # And first FFT bin index
|
|
# FFT output always has a little "sparkle" due to ambient hum.
|
|
# Subtracting a bit helps. Noise varies per element, more at low
|
|
# end...this table is just a non-scientific fudge factor...
|
|
self.noise.append(int(2.4 ** (4 - 4 * row / ROWS)))
|
|
|
|
def run(self):
|
|
"""Main loop for audio visualizer."""
|
|
|
|
# Some tables associated with each row of the display. These are
|
|
# visualizer specific, not part of the FFT processing, so they're
|
|
# here instead of part of the class above.
|
|
width = [0 for _ in range(ROWS)] # Current row width
|
|
peak = [0 for _ in range(ROWS)] # Recent row peak
|
|
dropv = [0.0 for _ in range(ROWS)] # Current peak falling speed
|
|
autolevel = [32.0 for _ in range(ROWS)] # Per-row auto adjust
|
|
|
|
start_time = time.monotonic()
|
|
frames = 0
|
|
|
|
while True:
|
|
|
|
# Read bytes from PyAudio stream, convert to int16, process
|
|
# via NumPy's FFT function...
|
|
data_8 = self.stream.read(FFT_SIZE * 2, exception_on_overflow=False)
|
|
data_16 = np.frombuffer(data_8, np.int16)
|
|
fft_out = np.fft.fft(data_16, norm="ortho")
|
|
# fft_out will have FFT_SIZE * 2 elements, mirrored at center
|
|
|
|
# Get spectrum of first half. Instead of square root for
|
|
# magnitude, use something between square and cube root.
|
|
# No scientific reason, just looked good.
|
|
spec_y = [
|
|
(c.real * c.real + c.imag * c.imag) ** 0.4 for c in fft_out[0:FFT_SIZE]
|
|
]
|
|
|
|
self.clear() # Clear canvas before drawing
|
|
for row in range(ROWS): # Low to high freq...
|
|
# Weigh & sum up all the FFT outputs affecting this row
|
|
total = 0
|
|
for idx, weight in enumerate(self.bin_weight[row]):
|
|
total += (spec_y[self.low_bin[row] + idx]) * weight
|
|
total /= self.bin_sum[row]
|
|
|
|
# Auto-leveling is intended to make each column 'pop'.
|
|
# When a particular column isn't getting a lot of input
|
|
# from the FFT, gradually boost that column's sensitivity.
|
|
if total > autolevel[row]: # New level is louder
|
|
# Make autolevel rise quickly if column total exceeds it
|
|
autolevel[row] = autolevel[row] * 0.25 + total * 0.75
|
|
else: # New level is softer
|
|
# And fall slowly otherwise
|
|
autolevel[row] = autolevel[row] * 0.98 + total * 0.02
|
|
# Autolevel limit keeps things from getting TOO boosty.
|
|
# Trial and error, no science to this number.
|
|
autolevel[row] = max(autolevel[row], 20)
|
|
|
|
# Apply autoleveling to weighted input.
|
|
# This is the prelim. row width before further filtering...
|
|
total *= 18 / autolevel[row] # 18 is 1/2 display width
|
|
|
|
# ...then filter the column width computed above
|
|
if total > width[row]:
|
|
# If it's greater than this column's current width,
|
|
# move column's width quickly in that direction
|
|
width[row] = width[row] * 0.3 + total * 0.7
|
|
else:
|
|
# If less, move slowly down
|
|
width[row] = width[row] * 0.5 + total * 0.5
|
|
|
|
# Compute "peak dots," which sort of show the recent
|
|
# peak level for each column (mostly just neat to watch).
|
|
if width[row] > peak[row]:
|
|
# If column exceeds old peak, move peak immediately,
|
|
# give it a slight upward boost.
|
|
dropv[row] = (peak[row] - width[row]) * 0.07
|
|
peak[row] = min(width[row], 18)
|
|
else:
|
|
# Otherwise, peak gradually accelerates down
|
|
dropv[row] += 0.2
|
|
peak[row] -= dropv[row]
|
|
|
|
# Draw bar for this row. It's done as a gradient,
|
|
# bright toward center, dim toward edge.
|
|
iwidth = int(width[row] + 0.5) # Integer width
|
|
drow = ROWS - 1 - row # Display row, reverse of freq row
|
|
if iwidth > 0:
|
|
iwidth = min(iwidth, 18) # Clip to 18 pixels
|
|
scale = self.brightness * iwidth / 18 # Center brightness
|
|
for col in range(iwidth):
|
|
level = int(scale * ((1.0 - col / iwidth) ** 2.6))
|
|
self.draw.point([17 - col, drow], fill=level)
|
|
self.draw.point([18 + col, drow], fill=level)
|
|
|
|
# Draw peak dot
|
|
if peak[row] > 0:
|
|
col = int(peak[row] + 0.5)
|
|
self.draw.point([17 - col, drow], fill=self.brightness)
|
|
self.draw.point([18 + col, drow], fill=self.brightness)
|
|
|
|
# Update matrices and show est. frames/second
|
|
self.redraw()
|
|
frames += 1
|
|
elapsed = time.monotonic() - start_time
|
|
print(frames / elapsed)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
MY_APP = AudioSpectrum() # Instantiate class, calls __init__() above
|
|
MY_APP.process()
|