Update Magic AI Storybook for Bookworm and new OpenAI API

This commit is contained in:
Melissa LeBlanc-Williams 2024-03-29 11:19:20 -07:00
parent 5047c10377
commit c354179e0c
3 changed files with 28 additions and 54 deletions

View file

@ -2,15 +2,13 @@
#
# SPDX-License-Identifier: MIT
from queue import Queue
import time
import speech_recognition as sr
class Listener:
def __init__(
self, api_key, energy_threshold=300, phrase_timeout=3.0, record_timeout=30
self, api_key, energy_threshold=300, record_timeout=30
):
self.listener_handle = None
self.microphone = sr.Microphone()
@ -18,62 +16,31 @@ class Listener:
self.recognizer.energy_threshold = energy_threshold
self.recognizer.dynamic_energy_threshold = False
self.recognizer.pause_threshold = 1
self.last_sample = bytes()
self.phrase_time = time.monotonic()
self.phrase_timeout = phrase_timeout
with self.microphone as source:
self.recognizer.adjust_for_ambient_noise(
source
) # we only need to calibrate once, before we start listening
self.record_timeout = record_timeout
self.phrase_complete = False
self.data_queue = Queue()
self._audio = None
self.listener_handle = None
self.api_key = api_key
def listen(self, ready_callback=None):
print("Start listening...")
self.phrase_complete = False
start = time.monotonic()
self._start_listening()
if ready_callback:
ready_callback()
while (
self.listener_handle and not self.speech_waiting()
) or not self.phrase_complete:
if self.phrase_time and time.monotonic() > start + self.phrase_timeout:
self.last_sample = bytes()
self.phrase_complete = True
self.phrase_time = time.monotonic() - start
):
time.sleep(0.1)
self.stop_listening()
def _save_audio_callback(self, _, audio):
print("Saving audio")
data = audio.get_raw_data()
self.data_queue.put(data)
def _get_audio(self):
"""Concatenate and convert the queued raw data back to audio and return it"""
start = time.monotonic()
if self.speech_waiting():
self.phrase_complete = False
if self.phrase_time and time.monotonic() > start + self.phrase_timeout:
self.last_sample = bytes()
self.phrase_complete = True
self.phrase_time = time.monotonic() - start
# Concatenate our current audio data with the latest audio data.
while self.speech_waiting():
data = self.data_queue.get()
self.last_sample += data
# Use AudioData to convert the raw data to wav data.
return sr.AudioData(
self.last_sample,
self.microphone.SAMPLE_RATE,
self.microphone.SAMPLE_WIDTH,
)
return None
self._audio = audio
def _start_listening(self):
if not self.listener_handle:
@ -93,20 +60,19 @@ class Listener:
return self.listener_handle is not None
def speech_waiting(self):
return not self.data_queue.empty()
return self._audio is not None
def recognize(self):
audio = self._get_audio()
if audio:
if self._audio:
# Transcribe the audio data to text using Whisper
print("Recognizing...")
attempts = 0
while attempts < 3:
try:
result = self.recognizer.recognize_whisper_api(
audio, api_key=self.api_key
self._audio, api_key=self.api_key
)
self._audio = None
return result.strip()
except sr.RequestError as e:
print(f"Error: {e}")

View file

@ -28,6 +28,7 @@ def main():
APP_PATH = "~/Magic_AI_Storybook/story.py"
APP_ICON = "~/Magic_AI_Storybook/images/magic_book_icon.png"
FILENAME = "storybook.desktop"
ENV_PATH = "~/story"
AUTO_START = True
if os.geteuid() == 0:
@ -41,12 +42,16 @@ def main():
APP_PATH = APP_PATH.replace("~", user_homedir)
APP_ICON = APP_ICON.replace("~", user_homedir)
PYTHON_PATH = "python"
if ENV_PATH is not None:
ENV_PATH = ENV_PATH.replace("~", user_homedir)
PYTHON_PATH = ENV_PATH + "/bin/" + PYTHON_PATH
shortcut_template = f"""[Desktop Entry]
Comment=Run {APP_TITLE}
Terminal={"true" if RUN_IN_TERMINAL else "false"}
Name={APP_TITLE}
Exec=sudo python {APP_PATH}
Exec=sudo -E env PATH=$PATH {PYTHON_PATH} {APP_PATH}
Type=Application
Icon={APP_ICON}
"""

View file

@ -16,7 +16,7 @@ from collections import deque
import board
import digitalio
import neopixel
import openai
from openai import OpenAI
import pygame
from rpi_backlight import Backlight
from adafruit_led_animation.animation.pulse import Pulse
@ -87,12 +87,11 @@ PARAGRAPH_SPACING = 30
# ChatGPT Parameters
SYSTEM_ROLE = "You are a master AI Storyteller that can tell a story of any length."
CHATGPT_MODEL = "gpt-3.5-turbo"
CHATGPT_MODEL = "gpt-3.5-turbo" # You can also use "gpt-4", which is slower, but more accurate
WHISPER_MODEL = "whisper-1"
# Speech Recognition Parameters
ENERGY_THRESHOLD = 300 # Energy level for mic to detect
PHRASE_TIMEOUT = 1.0 # Space between recordings for separating phrases
RECORD_TIMEOUT = 30 # Maximum time in seconds to wait for speech
# Do some checks and Import API keys from API_KEYS_FILE
@ -118,7 +117,10 @@ if "OPENAI_API_KEY" not in config["openai"]:
if len(config["openai"]["OPENAI_API_KEY"]) < 10:
print("Please set OPENAI_API_KEY in your API keys file with a valid key.")
sys.exit(1)
openai.api_key = config["openai"]["OPENAI_API_KEY"]
openai = OpenAI(
# This is the default and can be omitted
api_key=config["openai"]["OPENAI_API_KEY"],
)
# Check that the prompt file exists and load it
if not os.path.isfile(PROMPT_FILE):
@ -250,7 +252,7 @@ class Book:
# Initialize the Listener
self.listener = Listener(
openai.api_key, ENERGY_THRESHOLD, PHRASE_TIMEOUT, RECORD_TIMEOUT
openai.api_key, ENERGY_THRESHOLD, RECORD_TIMEOUT
)
# Preload remaining images
@ -728,8 +730,9 @@ class Book:
def _sendchat(self, prompt):
response = ""
print("Sending to chatGPT")
print("Prompt: ", prompt)
# Package up the text to send to ChatGPT
completion = openai.ChatCompletion.create(
stream = openai.chat.completions.create(
model=CHATGPT_MODEL,
messages=[
{"role": "system", "content": SYSTEM_ROLE},
@ -738,9 +741,9 @@ class Book:
stream=True,
)
for chunk in completion:
if "delta" in chunk.choices[0] and "content" in chunk.choices[0]["delta"]:
response += chunk.choices[0]["delta"]["content"]
for chunk in stream:
if chunk.choices[0].delta.content is not None:
response += chunk.choices[0].delta.content
if self._sleep_request:
return None