Merge pull request #2515 from makermelissa/main
Magic Storybook: Added stripped out stuff from listener back in
This commit is contained in:
commit
e5a0806a9b
2 changed files with 68 additions and 19 deletions
|
|
@ -2,62 +2,109 @@
|
|||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
from queue import Queue
|
||||
import time
|
||||
|
||||
import speech_recognition as sr
|
||||
|
||||
|
||||
class Listener:
|
||||
def __init__(self, api_key, energy_threshold=300, record_timeout=30):
|
||||
def __init__(
|
||||
self, api_key, energy_threshold=300, phrase_timeout=3.0, record_timeout=30
|
||||
):
|
||||
self.listener_handle = None
|
||||
self.microphone = sr.Microphone()
|
||||
self.recognizer = sr.Recognizer()
|
||||
self.recognizer.energy_threshold = energy_threshold
|
||||
self.recognizer.dynamic_energy_threshold = False
|
||||
self.recognizer.pause_threshold = 1
|
||||
self.last_sample = bytes()
|
||||
self.phrase_time = time.monotonic()
|
||||
self.phrase_timeout = phrase_timeout
|
||||
with self.microphone as source:
|
||||
self.recognizer.adjust_for_ambient_noise(
|
||||
source
|
||||
) # we only need to calibrate once, before we start listening
|
||||
self.record_timeout = record_timeout
|
||||
self.phrase_complete = False
|
||||
self.data_queue = Queue()
|
||||
self.listener_handle = None
|
||||
self.audio = None
|
||||
self.api_key = api_key
|
||||
|
||||
def listen(self, ready_callback=None):
|
||||
print("Start listening...")
|
||||
self.phrase_complete = False
|
||||
start = time.monotonic()
|
||||
self._start_listening()
|
||||
if ready_callback:
|
||||
ready_callback()
|
||||
while self.listener_handle and self.audio is None:
|
||||
time.sleep(0.1)
|
||||
while (
|
||||
self.listener_handle and not self.speech_waiting()
|
||||
) or not self.phrase_complete:
|
||||
if self.phrase_time and time.monotonic() > start + self.phrase_timeout:
|
||||
self.last_sample = bytes()
|
||||
self.phrase_complete = True
|
||||
self.phrase_time = time.monotonic() - start
|
||||
self.stop_listening()
|
||||
|
||||
def _save_audio_callback(self, _recognizer, audio):
|
||||
self.audio = audio
|
||||
def _save_audio_callback(self, _, audio):
|
||||
print("Saving audio")
|
||||
data = audio.get_raw_data()
|
||||
self.data_queue.put(data)
|
||||
|
||||
def _get_audio(self):
|
||||
"""Concatenate and convert the queued raw data back to audio and return it"""
|
||||
start = time.monotonic()
|
||||
if self.speech_waiting():
|
||||
self.phrase_complete = False
|
||||
if self.phrase_time and time.monotonic() > start + self.phrase_timeout:
|
||||
self.last_sample = bytes()
|
||||
self.phrase_complete = True
|
||||
self.phrase_time = time.monotonic() - start
|
||||
|
||||
# Concatenate our current audio data with the latest audio data.
|
||||
while self.speech_waiting():
|
||||
data = self.data_queue.get()
|
||||
self.last_sample += data
|
||||
|
||||
# Use AudioData to convert the raw data to wav data.
|
||||
return sr.AudioData(
|
||||
self.last_sample,
|
||||
self.microphone.SAMPLE_RATE,
|
||||
self.microphone.SAMPLE_WIDTH,
|
||||
)
|
||||
return None
|
||||
|
||||
def _start_listening(self):
|
||||
self.listener_handle = self.recognizer.listen_in_background(
|
||||
self.microphone, self._save_audio_callback
|
||||
)
|
||||
if not self.listener_handle:
|
||||
self.listener_handle = self.recognizer.listen_in_background(
|
||||
self.microphone,
|
||||
self._save_audio_callback,
|
||||
phrase_time_limit=self.record_timeout,
|
||||
)
|
||||
|
||||
def stop_listening(self, wait_for_stop=False):
|
||||
if self.listener_handle:
|
||||
self.listener_handle(wait_for_stop=wait_for_stop)
|
||||
self.listener_handle = None
|
||||
print("Stop listening...")
|
||||
|
||||
def is_listening(self):
|
||||
return self.listener_handle is not None
|
||||
|
||||
def speech_waiting(self):
|
||||
return self.audio is not None
|
||||
return not self.data_queue.empty()
|
||||
|
||||
def recognize(self):
|
||||
if self.audio:
|
||||
audio = self._get_audio()
|
||||
if audio:
|
||||
# Transcribe the audio data to text using Whisper
|
||||
print("Recognizing...")
|
||||
attempts = 0
|
||||
while attempts < 3:
|
||||
try:
|
||||
result = self.recognizer.recognize_whisper_api(
|
||||
self.audio, api_key=self.api_key
|
||||
audio, api_key=self.api_key
|
||||
)
|
||||
|
||||
return result.strip()
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ TITLE_FADE_TIME = 0.05
|
|||
TITLE_FADE_STEPS = 25
|
||||
TEXT_FADE_TIME = 0.25
|
||||
TEXT_FADE_STEPS = 51
|
||||
ALSA_ERROR_DELAY = 1.0 # Delay to wait after an ALSA errors
|
||||
ALSA_ERROR_DELAY = 0.5 # Delay to wait after an ALSA errors
|
||||
|
||||
# Whitespace Settings (in Pixels)
|
||||
PAGE_TOP_MARGIN = 20
|
||||
|
|
@ -92,6 +92,7 @@ WHISPER_MODEL = "whisper-1"
|
|||
|
||||
# Speech Recognition Parameters
|
||||
ENERGY_THRESHOLD = 300 # Energy level for mic to detect
|
||||
PHRASE_TIMEOUT = 1.0 # Space between recordings for separating phrases
|
||||
RECORD_TIMEOUT = 30 # Maximum time in seconds to wait for speech
|
||||
|
||||
# Do some checks and Import API keys from API_KEYS_FILE
|
||||
|
|
@ -248,7 +249,9 @@ class Book:
|
|||
self._prompt = f.read()
|
||||
|
||||
# Initialize the Listener
|
||||
self.listener = Listener(openai.api_key, ENERGY_THRESHOLD, RECORD_TIMEOUT)
|
||||
self.listener = Listener(
|
||||
openai.api_key, ENERGY_THRESHOLD, PHRASE_TIMEOUT, RECORD_TIMEOUT
|
||||
)
|
||||
|
||||
# Preload remaining images
|
||||
self._load_image("background", BACKGROUND_IMAGE)
|
||||
|
|
@ -636,17 +639,16 @@ class Book:
|
|||
if self._sleep_request:
|
||||
self._busy = False
|
||||
time.sleep(0.2)
|
||||
print("Not busy anymore")
|
||||
return
|
||||
|
||||
def show_waiting():
|
||||
def show_listening():
|
||||
# Pause for a beat because the listener doesn't
|
||||
# immediately start listening sometimes
|
||||
time.sleep(ALSA_ERROR_DELAY)
|
||||
self.pixels.fill(NEOPIXEL_WAITING_COLOR)
|
||||
self.pixels.show()
|
||||
|
||||
self.listener.listen(ready_callback=show_waiting)
|
||||
self.listener.listen(ready_callback=show_listening)
|
||||
|
||||
if self._sleep_request:
|
||||
self._busy = False
|
||||
|
|
@ -654,10 +656,11 @@ class Book:
|
|||
|
||||
if not self.listener.speech_waiting():
|
||||
# No response from user, so return
|
||||
print("No response from user.")
|
||||
return
|
||||
|
||||
story_request = self.listener.recognize()
|
||||
|
||||
print(f"Whisper heard: {story_request}")
|
||||
story_prompt = self._make_story_prompt(story_request)
|
||||
self.display_loading()
|
||||
response = self._sendchat(story_prompt)
|
||||
|
|
@ -680,7 +683,6 @@ class Book:
|
|||
if self.listener.is_listening():
|
||||
self.listener.stop_listening()
|
||||
while self._busy:
|
||||
print("Still busy")
|
||||
time.sleep(0.1)
|
||||
self._sleep_request = False
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue