Update Magic AI Storybook for Bookworm and new OpenAI API

This commit is contained in:
Melissa LeBlanc-Williams 2024-03-29 11:19:20 -07:00
parent 5047c10377
commit c354179e0c
3 changed files with 28 additions and 54 deletions

View file

@ -2,15 +2,13 @@
# #
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
from queue import Queue
import time import time
import speech_recognition as sr import speech_recognition as sr
class Listener: class Listener:
def __init__( def __init__(
self, api_key, energy_threshold=300, phrase_timeout=3.0, record_timeout=30 self, api_key, energy_threshold=300, record_timeout=30
): ):
self.listener_handle = None self.listener_handle = None
self.microphone = sr.Microphone() self.microphone = sr.Microphone()
@ -18,62 +16,31 @@ class Listener:
self.recognizer.energy_threshold = energy_threshold self.recognizer.energy_threshold = energy_threshold
self.recognizer.dynamic_energy_threshold = False self.recognizer.dynamic_energy_threshold = False
self.recognizer.pause_threshold = 1 self.recognizer.pause_threshold = 1
self.last_sample = bytes()
self.phrase_time = time.monotonic() self.phrase_time = time.monotonic()
self.phrase_timeout = phrase_timeout
with self.microphone as source: with self.microphone as source:
self.recognizer.adjust_for_ambient_noise( self.recognizer.adjust_for_ambient_noise(
source source
) # we only need to calibrate once, before we start listening ) # we only need to calibrate once, before we start listening
self.record_timeout = record_timeout self.record_timeout = record_timeout
self.phrase_complete = False self._audio = None
self.data_queue = Queue()
self.listener_handle = None self.listener_handle = None
self.api_key = api_key self.api_key = api_key
def listen(self, ready_callback=None): def listen(self, ready_callback=None):
print("Start listening...") print("Start listening...")
self.phrase_complete = False
start = time.monotonic()
self._start_listening() self._start_listening()
if ready_callback: if ready_callback:
ready_callback() ready_callback()
while ( while (
self.listener_handle and not self.speech_waiting() self.listener_handle and not self.speech_waiting()
) or not self.phrase_complete: ):
if self.phrase_time and time.monotonic() > start + self.phrase_timeout: time.sleep(0.1)
self.last_sample = bytes()
self.phrase_complete = True
self.phrase_time = time.monotonic() - start
self.stop_listening() self.stop_listening()
def _save_audio_callback(self, _, audio): def _save_audio_callback(self, _, audio):
print("Saving audio") print("Saving audio")
data = audio.get_raw_data() self._audio = audio
self.data_queue.put(data)
def _get_audio(self):
"""Concatenate and convert the queued raw data back to audio and return it"""
start = time.monotonic()
if self.speech_waiting():
self.phrase_complete = False
if self.phrase_time and time.monotonic() > start + self.phrase_timeout:
self.last_sample = bytes()
self.phrase_complete = True
self.phrase_time = time.monotonic() - start
# Concatenate our current audio data with the latest audio data.
while self.speech_waiting():
data = self.data_queue.get()
self.last_sample += data
# Use AudioData to convert the raw data to wav data.
return sr.AudioData(
self.last_sample,
self.microphone.SAMPLE_RATE,
self.microphone.SAMPLE_WIDTH,
)
return None
def _start_listening(self): def _start_listening(self):
if not self.listener_handle: if not self.listener_handle:
@ -93,20 +60,19 @@ class Listener:
return self.listener_handle is not None return self.listener_handle is not None
def speech_waiting(self): def speech_waiting(self):
return not self.data_queue.empty() return self._audio is not None
def recognize(self): def recognize(self):
audio = self._get_audio() if self._audio:
if audio:
# Transcribe the audio data to text using Whisper # Transcribe the audio data to text using Whisper
print("Recognizing...") print("Recognizing...")
attempts = 0 attempts = 0
while attempts < 3: while attempts < 3:
try: try:
result = self.recognizer.recognize_whisper_api( result = self.recognizer.recognize_whisper_api(
audio, api_key=self.api_key self._audio, api_key=self.api_key
) )
self._audio = None
return result.strip() return result.strip()
except sr.RequestError as e: except sr.RequestError as e:
print(f"Error: {e}") print(f"Error: {e}")

View file

@ -28,6 +28,7 @@ def main():
APP_PATH = "~/Magic_AI_Storybook/story.py" APP_PATH = "~/Magic_AI_Storybook/story.py"
APP_ICON = "~/Magic_AI_Storybook/images/magic_book_icon.png" APP_ICON = "~/Magic_AI_Storybook/images/magic_book_icon.png"
FILENAME = "storybook.desktop" FILENAME = "storybook.desktop"
ENV_PATH = "~/story"
AUTO_START = True AUTO_START = True
if os.geteuid() == 0: if os.geteuid() == 0:
@ -41,12 +42,16 @@ def main():
APP_PATH = APP_PATH.replace("~", user_homedir) APP_PATH = APP_PATH.replace("~", user_homedir)
APP_ICON = APP_ICON.replace("~", user_homedir) APP_ICON = APP_ICON.replace("~", user_homedir)
PYTHON_PATH = "python"
if ENV_PATH is not None:
ENV_PATH = ENV_PATH.replace("~", user_homedir)
PYTHON_PATH = ENV_PATH + "/bin/" + PYTHON_PATH
shortcut_template = f"""[Desktop Entry] shortcut_template = f"""[Desktop Entry]
Comment=Run {APP_TITLE} Comment=Run {APP_TITLE}
Terminal={"true" if RUN_IN_TERMINAL else "false"} Terminal={"true" if RUN_IN_TERMINAL else "false"}
Name={APP_TITLE} Name={APP_TITLE}
Exec=sudo python {APP_PATH} Exec=sudo -E env PATH=$PATH {PYTHON_PATH} {APP_PATH}
Type=Application Type=Application
Icon={APP_ICON} Icon={APP_ICON}
""" """

View file

@ -16,7 +16,7 @@ from collections import deque
import board import board
import digitalio import digitalio
import neopixel import neopixel
import openai from openai import OpenAI
import pygame import pygame
from rpi_backlight import Backlight from rpi_backlight import Backlight
from adafruit_led_animation.animation.pulse import Pulse from adafruit_led_animation.animation.pulse import Pulse
@ -87,12 +87,11 @@ PARAGRAPH_SPACING = 30
# ChatGPT Parameters # ChatGPT Parameters
SYSTEM_ROLE = "You are a master AI Storyteller that can tell a story of any length." SYSTEM_ROLE = "You are a master AI Storyteller that can tell a story of any length."
CHATGPT_MODEL = "gpt-3.5-turbo" CHATGPT_MODEL = "gpt-3.5-turbo" # You can also use "gpt-4", which is slower, but more accurate
WHISPER_MODEL = "whisper-1" WHISPER_MODEL = "whisper-1"
# Speech Recognition Parameters # Speech Recognition Parameters
ENERGY_THRESHOLD = 300 # Energy level for mic to detect ENERGY_THRESHOLD = 300 # Energy level for mic to detect
PHRASE_TIMEOUT = 1.0 # Space between recordings for separating phrases
RECORD_TIMEOUT = 30 # Maximum time in seconds to wait for speech RECORD_TIMEOUT = 30 # Maximum time in seconds to wait for speech
# Do some checks and Import API keys from API_KEYS_FILE # Do some checks and Import API keys from API_KEYS_FILE
@ -118,7 +117,10 @@ if "OPENAI_API_KEY" not in config["openai"]:
if len(config["openai"]["OPENAI_API_KEY"]) < 10: if len(config["openai"]["OPENAI_API_KEY"]) < 10:
print("Please set OPENAI_API_KEY in your API keys file with a valid key.") print("Please set OPENAI_API_KEY in your API keys file with a valid key.")
sys.exit(1) sys.exit(1)
openai.api_key = config["openai"]["OPENAI_API_KEY"] openai = OpenAI(
# This is the default and can be omitted
api_key=config["openai"]["OPENAI_API_KEY"],
)
# Check that the prompt file exists and load it # Check that the prompt file exists and load it
if not os.path.isfile(PROMPT_FILE): if not os.path.isfile(PROMPT_FILE):
@ -250,7 +252,7 @@ class Book:
# Initialize the Listener # Initialize the Listener
self.listener = Listener( self.listener = Listener(
openai.api_key, ENERGY_THRESHOLD, PHRASE_TIMEOUT, RECORD_TIMEOUT openai.api_key, ENERGY_THRESHOLD, RECORD_TIMEOUT
) )
# Preload remaining images # Preload remaining images
@ -728,8 +730,9 @@ class Book:
def _sendchat(self, prompt): def _sendchat(self, prompt):
response = "" response = ""
print("Sending to chatGPT") print("Sending to chatGPT")
print("Prompt: ", prompt)
# Package up the text to send to ChatGPT # Package up the text to send to ChatGPT
completion = openai.ChatCompletion.create( stream = openai.chat.completions.create(
model=CHATGPT_MODEL, model=CHATGPT_MODEL,
messages=[ messages=[
{"role": "system", "content": SYSTEM_ROLE}, {"role": "system", "content": SYSTEM_ROLE},
@ -738,9 +741,9 @@ class Book:
stream=True, stream=True,
) )
for chunk in completion: for chunk in stream:
if "delta" in chunk.choices[0] and "content" in chunk.choices[0]["delta"]: if chunk.choices[0].delta.content is not None:
response += chunk.choices[0]["delta"]["content"] response += chunk.choices[0].delta.content
if self._sleep_request: if self._sleep_request:
return None return None