Update Magic AI Storybook for Bookworm and new OpenAI API

2024-03-29 11:19:20 -07:00 · 2024-03-29 11:19:20 -07:00 · c354179e0c
commit c354179e0c
parent 5047c10377
3 changed files with 28 additions and 54 deletions
--- a/Magic_AI_Storybook/listener.py
+++ b/Magic_AI_Storybook/listener.py
@ -2,15 +2,13 @@
 #
 # SPDX-License-Identifier: MIT

-from queue import Queue
 import time

 import speech_recognition as sr

-
 class Listener:
    def __init__(
-        self, api_key, energy_threshold=300, phrase_timeout=3.0, record_timeout=30
+        self, api_key, energy_threshold=300, record_timeout=30
    ):
        self.listener_handle = None
        self.microphone = sr.Microphone()
@ -18,62 +16,31 @@ class Listener:
        self.recognizer.energy_threshold = energy_threshold
        self.recognizer.dynamic_energy_threshold = False
        self.recognizer.pause_threshold = 1
-        self.last_sample = bytes()
        self.phrase_time = time.monotonic()
-        self.phrase_timeout = phrase_timeout
        with self.microphone as source:
            self.recognizer.adjust_for_ambient_noise(
                source
            )  # we only need to calibrate once, before we start listening
        self.record_timeout = record_timeout
-        self.phrase_complete = False
-        self.data_queue = Queue()
+        self._audio = None
        self.listener_handle = None
        self.api_key = api_key

    def listen(self, ready_callback=None):
        print("Start listening...")
-        self.phrase_complete = False
-        start = time.monotonic()
        self._start_listening()
        if ready_callback:
            ready_callback()
+
        while (
            self.listener_handle and not self.speech_waiting()
-        ) or not self.phrase_complete:
-            if self.phrase_time and time.monotonic() > start + self.phrase_timeout:
-                self.last_sample = bytes()
-                self.phrase_complete = True
-            self.phrase_time = time.monotonic() - start
+        ):
+            time.sleep(0.1)
        self.stop_listening()

    def _save_audio_callback(self, _, audio):
        print("Saving audio")
-        data = audio.get_raw_data()
-        self.data_queue.put(data)
-
-    def _get_audio(self):
-        """Concatenate and convert the queued raw data back to audio and return it"""
-        start = time.monotonic()
-        if self.speech_waiting():
-            self.phrase_complete = False
-            if self.phrase_time and time.monotonic() > start + self.phrase_timeout:
-                self.last_sample = bytes()
-                self.phrase_complete = True
-            self.phrase_time = time.monotonic() - start
-
-            # Concatenate our current audio data with the latest audio data.
-            while self.speech_waiting():
-                data = self.data_queue.get()
-                self.last_sample += data
-
-            # Use AudioData to convert the raw data to wav data.
-            return sr.AudioData(
-                self.last_sample,
-                self.microphone.SAMPLE_RATE,
-                self.microphone.SAMPLE_WIDTH,
-            )
-        return None
+        self._audio = audio

    def _start_listening(self):
        if not self.listener_handle:
@ -93,20 +60,19 @@ class Listener:
        return self.listener_handle is not None

    def speech_waiting(self):
-        return not self.data_queue.empty()
+        return self._audio is not None

    def recognize(self):
-        audio = self._get_audio()
-        if audio:
+        if self._audio:
            # Transcribe the audio data to text using Whisper
            print("Recognizing...")
            attempts = 0
            while attempts < 3:
                try:
                    result = self.recognizer.recognize_whisper_api(
-                        audio, api_key=self.api_key
+                        self._audio, api_key=self.api_key
                    )
-
+                    self._audio = None
                    return result.strip()
                except sr.RequestError as e:
                    print(f"Error: {e}")
--- a/Magic_AI_Storybook/make_shortcut.py
+++ b/Magic_AI_Storybook/make_shortcut.py
@ -28,6 +28,7 @@ def main():
    APP_PATH = "~/Magic_AI_Storybook/story.py"
    APP_ICON = "~/Magic_AI_Storybook/images/magic_book_icon.png"
    FILENAME = "storybook.desktop"
+    ENV_PATH = "~/story"
    AUTO_START = True

    if os.geteuid() == 0:
@ -41,12 +42,16 @@ def main():

    APP_PATH = APP_PATH.replace("~", user_homedir)
    APP_ICON = APP_ICON.replace("~", user_homedir)
+    PYTHON_PATH = "python"
+    if ENV_PATH is not None:
+        ENV_PATH = ENV_PATH.replace("~", user_homedir)
+        PYTHON_PATH = ENV_PATH + "/bin/" + PYTHON_PATH

    shortcut_template = f"""[Desktop Entry]
 Comment=Run {APP_TITLE}
 Terminal={"true" if RUN_IN_TERMINAL else "false"}
 Name={APP_TITLE}
-Exec=sudo python {APP_PATH}
+Exec=sudo -E env PATH=$PATH {PYTHON_PATH} {APP_PATH}
 Type=Application
 Icon={APP_ICON}
 """
--- a/Magic_AI_Storybook/story.py
+++ b/Magic_AI_Storybook/story.py
@ -16,7 +16,7 @@ from collections import deque
 import board
 import digitalio
 import neopixel
-import openai
+from openai import OpenAI
 import pygame
 from rpi_backlight import Backlight
 from adafruit_led_animation.animation.pulse import Pulse
@ -87,12 +87,11 @@ PARAGRAPH_SPACING = 30

 # ChatGPT Parameters
 SYSTEM_ROLE = "You are a master AI Storyteller that can tell a story of any length."
-CHATGPT_MODEL = "gpt-3.5-turbo"
+CHATGPT_MODEL = "gpt-3.5-turbo"  # You can also use "gpt-4", which is slower, but more accurate
 WHISPER_MODEL = "whisper-1"

 # Speech Recognition Parameters
 ENERGY_THRESHOLD = 300  # Energy level for mic to detect
-PHRASE_TIMEOUT = 1.0  # Space between recordings for separating phrases
 RECORD_TIMEOUT = 30  # Maximum time in seconds to wait for speech

 # Do some checks and Import API keys from API_KEYS_FILE
@ -118,7 +117,10 @@ if "OPENAI_API_KEY" not in config["openai"]:
 if len(config["openai"]["OPENAI_API_KEY"]) < 10:
    print("Please set OPENAI_API_KEY in your API keys file with a valid key.")
    sys.exit(1)
-openai.api_key = config["openai"]["OPENAI_API_KEY"]
+openai = OpenAI(
+    # This is the default and can be omitted
+    api_key=config["openai"]["OPENAI_API_KEY"],
+)

 # Check that the prompt file exists and load it
 if not os.path.isfile(PROMPT_FILE):
@ -250,7 +252,7 @@ class Book:

        # Initialize the Listener
        self.listener = Listener(
-            openai.api_key, ENERGY_THRESHOLD, PHRASE_TIMEOUT, RECORD_TIMEOUT
+            openai.api_key, ENERGY_THRESHOLD, RECORD_TIMEOUT
        )

        # Preload remaining images
@ -728,8 +730,9 @@ class Book:
    def _sendchat(self, prompt):
        response = ""
        print("Sending to chatGPT")
+        print("Prompt: ", prompt)
        # Package up the text to send to ChatGPT
-        completion = openai.ChatCompletion.create(
+        stream = openai.chat.completions.create(
            model=CHATGPT_MODEL,
            messages=[
                {"role": "system", "content": SYSTEM_ROLE},
@ -738,9 +741,9 @@ class Book:
            stream=True,
        )

-        for chunk in completion:
-            if "delta" in chunk.choices[0] and "content" in chunk.choices[0]["delta"]:
-                response += chunk.choices[0]["delta"]["content"]
+        for chunk in stream:
+            if chunk.choices[0].delta.content is not None:
+                response += chunk.choices[0].delta.content
            if self._sleep_request:
                return None