Adafruit_Learning_System_Gu.../LittleConnectionMachine/RaspberryPi/audio.py

# SPDX-FileCopyrightText: 2022 Phillip Burgess for Adafruit Industries
#
# SPDX-License-Identifier: MIT

"""
Audio spectrum display for Little Connection Machine. This is designed to be
fun to look at, not a Serious Audio Tool(tm). Requires USB microphone & ALSA
config. Prerequisite libraries include PyAudio and NumPy:
sudo apt-get install libatlas-base-dev libportaudio2
pip3 install numpy pyaudio
See the following for ALSA config (use Stretch directions):
learn.adafruit.com/usb-audio-cards-with-a-raspberry-pi/updating-alsa-config
"""

import math
import time
import numpy as np
import pyaudio
from cm1 import CM1

# FFT configurables. These numbers are 'hard,' actual figures:
RATE = 11025  # For audio vis, don't want or need high sample rate!
FFT_SIZE = 128  # Audio samples to read per frame (for FFT input)
ROWS = 32  # FFT output filtered down to this many 'buckets'
# Then things start getting subjective. For example, the lower and upper
# ends of the FFT output don't make a good contribution to the resulting
# graph...either too noisy, or out of musical range. Clip a range between
# between 0 and FFT_SIZE-1. These aren't hard science, they were determined
# by playing various music and seeing what looked good:
LEAST = 1  # Lowest bin of FFT output to use
MOST = 111  # Highest bin of FFT output to use
# And moreso. Normally, FFT results are linearly spaced by frequency,
# and with music this results in a crowded low end and sparse high end.
# The visualizer reformats this logarithmically so octaves are linearly
# spaced...the low end is expanded, upper end compressed. But just picking
# individial FFT bins will cause visual dropouts. Instead, a number of
# inputs are merged into each output, and because of the logarithmic scale,
# that number needs to be focused near the low end and spread out among
# many samples toward the top. Again, not scientific, these were derived
# empirically by throwing music at it and adjusting:
FIRST_WIDTH = 2  # Width of sampling curve at low end
LAST_WIDTH = 40  # Width of sampling curve at high end
# Except for ROWS above, none of this is involved in the actual rendering
# of the graph, just how the data is massaged. If modifying this for your
# own FFT-based visualizer, you could keep this around and just change the
# drawing parts of the main loop.


class AudioSpectrum(CM1):
    """Audio spectrum display for Little Connection Machine."""

    # pylint: disable=too-many-locals
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)  # CM1 base initialization

        # Access USB mic via PyAudio
        audio = pyaudio.PyAudio()
        self.stream = audio.open(
            format=pyaudio.paInt16,  # 16-bit int
            channels=1,  # Mono
            rate=RATE,
            input=True,
            output=False,
            frames_per_buffer=FFT_SIZE,
        )

        # Precompute a few items for the math to follow
        first_center_log = math.log2(LEAST + 0.5)
        center_log_spread = math.log2(MOST + 0.5) - first_center_log
        width_low_log = math.log2(FIRST_WIDTH)
        width_log_spread = math.log2(LAST_WIDTH) - width_low_log

        # As mentioned earlier, each row of the graph is filtered down from
        # multiple FFT elements. These lists are involved in that filtering,
        # each has one item per row of output:
        self.low_bin = []  # First FFT bin that contributes to row
        self.bin_weight = []  # List of subsequent FFT element weightings
        self.bin_sum = []  # Precomputed sum of bin_weight for row
        self.noise = []  # Subtracted from FFT output (see note later)

        for row in range(ROWS):  # For each row...
            # Calc center & spread of cubic curve for bin weighting
            center_log = first_center_log + center_log_spread * row / (ROWS - 1)
            center_linear = 2**center_log
            width_log = width_low_log + width_log_spread * row / (ROWS - 1)
            width_linear = 2**width_log
            half_width = width_linear * 0.5
            lower = center_linear - half_width
            upper = center_linear + half_width
            low_bin = int(lower)  # First FFT element to use
            hi_bin = min(FFT_SIZE - 1, int(upper))  # Last "
            weights = []  # FFT weights for row
            for bin_num in range(low_bin, hi_bin + 1):
                bin_center = bin_num + 0.5
                dist = abs(bin_center - center_linear) / half_width
                if dist < 1.0:  # Filter out a math stragglers at either end
                    # Bin weights have a cubic falloff curve within range:
                    dist = 1.0 - dist  # Invert dist so 1.0 is at center
                    weight = ((3.0 - (dist * 2.0)) * dist) * dist
                    weights.append(weight)
            self.bin_weight.append(weights)  # Save list of weights for row
            self.bin_sum.append(sum(weights))  # And sum of weights
            self.low_bin.append(low_bin)  # And first FFT bin index
            # FFT output always has a little "sparkle" due to ambient hum.
            # Subtracting a bit helps. Noise varies per element, more at low
            # end...this table is just a non-scientific fudge factor...
            self.noise.append(int(2.4 ** (4 - 4 * row / ROWS)))

    def run(self):
        """Main loop for audio visualizer."""

        # Some tables associated with each row of the display. These are
        # visualizer specific, not part of the FFT processing, so they're
        # here instead of part of the class above.
        width = [0 for _ in range(ROWS)]  # Current row width
        peak = [0 for _ in range(ROWS)]  # Recent row peak
        dropv = [0.0 for _ in range(ROWS)]  # Current peak falling speed
        autolevel = [32.0 for _ in range(ROWS)]  # Per-row auto adjust

        start_time = time.monotonic()
        frames = 0

        while True:

            # Read bytes from PyAudio stream, convert to int16, process
            # via NumPy's FFT function...
            data_8 = self.stream.read(FFT_SIZE * 2, exception_on_overflow=False)
            data_16 = np.frombuffer(data_8, np.int16)
            fft_out = np.fft.fft(data_16, norm="ortho")
            # fft_out will have FFT_SIZE * 2 elements, mirrored at center

            # Get spectrum of first half. Instead of square root for
            # magnitude, use something between square and cube root.
            # No scientific reason, just looked good.
            spec_y = [
                (c.real * c.real + c.imag * c.imag) ** 0.4 for c in fft_out[0:FFT_SIZE]
            ]

            self.clear()  # Clear canvas before drawing
            for row in range(ROWS):  # Low to high freq...
                # Weigh & sum up all the FFT outputs affecting this row
                total = 0
                for idx, weight in enumerate(self.bin_weight[row]):
                    total += (spec_y[self.low_bin[row] + idx]) * weight
                total /= self.bin_sum[row]

                # Auto-leveling is intended to make each column 'pop'.
                # When a particular column isn't getting a lot of input
                # from the FFT, gradually boost that column's sensitivity.
                if total > autolevel[row]:  # New level is louder
                    # Make autolevel rise quickly if column total exceeds it
                    autolevel[row] = autolevel[row] * 0.25 + total * 0.75
                else:  # New level is softer
                    # And fall slowly otherwise
                    autolevel[row] = autolevel[row] * 0.98 + total * 0.02
                # Autolevel limit keeps things from getting TOO boosty.
                # Trial and error, no science to this number.
                autolevel[row] = max(autolevel[row], 20)

                # Apply autoleveling to weighted input.
                # This is the prelim. row width before further filtering...
                total *= 18 / autolevel[row]  # 18 is 1/2 display width

                # ...then filter the column width computed above
                if total > width[row]:
                    # If it's greater than this column's current width,
                    # move column's width quickly in that direction
                    width[row] = width[row] * 0.3 + total * 0.7
                else:
                    # If less, move slowly down
                    width[row] = width[row] * 0.5 + total * 0.5

                # Compute "peak dots," which sort of show the recent
                # peak level for each column (mostly just neat to watch).
                if width[row] > peak[row]:
                    # If column exceeds old peak, move peak immediately,
                    # give it a slight upward boost.
                    dropv[row] = (peak[row] - width[row]) * 0.07
                    peak[row] = min(width[row], 18)
                else:
                    # Otherwise, peak gradually accelerates down
                    dropv[row] += 0.2
                    peak[row] -= dropv[row]

                # Draw bar for this row. It's done as a gradient,
                # bright toward center, dim toward edge.
                iwidth = int(width[row] + 0.5)  # Integer width
                drow = ROWS - 1 - row  # Display row, reverse of freq row
                if iwidth > 0:
                    iwidth = min(iwidth, 18)  # Clip to 18 pixels
                    scale = self.brightness * iwidth / 18  # Center brightness
                    for col in range(iwidth):
                        level = int(scale * ((1.0 - col / iwidth) ** 2.6))
                        self.draw.point([17 - col, drow], fill=level)
                        self.draw.point([18 + col, drow], fill=level)

                # Draw peak dot
                if peak[row] > 0:
                    col = int(peak[row] + 0.5)
                    self.draw.point([17 - col, drow], fill=self.brightness)
                    self.draw.point([18 + col, drow], fill=self.brightness)

            # Update matrices and show est. frames/second
            self.redraw()
            frames += 1
            elapsed = time.monotonic() - start_time
            print(frames / elapsed)


if __name__ == "__main__":
    MY_APP = AudioSpectrum()  # Instantiate class, calls __init__() above
    MY_APP.process()