Merge pull request #14 from jepler/huggingface

Add huggingface back-end
2023-09-29 10:18:58 -05:00 · 2023-09-29 10:18:58 -05:00 · f3bf17ca2f
commit f3bf17ca2f
parent 9919c9a229 b6fa44f53e
5 changed files with 164 additions and 11 deletions
--- a/src/chap/backends/huggingface.py
+++ b/src/chap/backends/huggingface.py
@ -0,0 +1,117 @@
+# SPDX-FileCopyrightText: 2023 Jeff Epler <jepler@gmail.com>
+#
+# SPDX-License-Identifier: MIT
+
+import asyncio
+import json
+from dataclasses import dataclass
+
+import httpx
+
+from ..key import get_key
+from ..session import Assistant, User
+
+
+class HuggingFace:
+    @dataclass
+    class Parameters:
+        url: str = "https://api-inference.huggingface.co"
+        model: str = "mistralai/Mistral-7B-Instruct-v0.1"
+        max_new_tokens: int = 250
+        start_prompt: str = """<s>[INST] <<SYS>>\n"""
+        after_system: str = "\n<</SYS>>\n\n"
+        after_user: str = """ [/INST] """
+        after_assistant: str = """ </s><s>[INST] """
+        stop_token_id = 2
+
+    def __init__(self):
+        self.parameters = self.Parameters()
+
+    system_message = """\
+A dialog, where USER interacts with AI. AI is helpful, kind, obedient, honest, and knows its own limits.
+"""
+
+    def make_full_query(self, messages, max_query_size):
+        del messages[1:-max_query_size]
+        result = [self.parameters.start_prompt]
+        for m in messages:
+            content = (m.content or "").strip()
+            if not content:
+                continue
+            result.append(content)
+            if m.role == "system":
+                result.append(self.parameters.after_system)
+            elif m.role == "assistant":
+                result.append(self.parameters.after_assistant)
+            elif m.role == "user":
+                result.append(self.parameters.after_user)
+        full_query = "".join(result)
+        return full_query
+
+    async def chained_query(self, inputs, timeout):
+        async with httpx.AsyncClient(timeout=timeout) as client:
+            while inputs:
+                params = {
+                    "inputs": inputs,
+                    "stream": True,
+                }
+                inputs = None
+                async with client.stream(
+                    "POST",
+                    f"{self.parameters.url}/models/{self.parameters.model}",
+                    json=params,
+                    headers={
+                        "Authorization": f"Bearer {self.get_key()}",
+                    },
+                ) as response:
+                    if response.status_code == 200:
+                        async for line in response.aiter_lines():
+                            if line.startswith("data:"):
+                                data = line.removeprefix("data:").strip()
+                                j = json.loads(data)
+                                token = j.get("token", {})
+                                inputs = j.get("generated_text", inputs)
+                                if token.get("id") == self.parameters.stop_token_id:
+                                    return
+                                yield token.get("text", "")
+                    else:
+                        yield f"\nFailed with {response=!r}"
+                        return
+
+    async def aask(
+        self, session, query, *, max_query_size=5, timeout=180
+    ):  # pylint: disable=unused-argument,too-many-locals,too-many-branches
+        new_content = []
+        inputs = self.make_full_query(session.session + [User(query)], max_query_size)
+        try:
+            async for content in self.chained_query(inputs, timeout=timeout):
+                if not new_content:
+                    content = content.lstrip()
+                if content:
+                    if not new_content:
+                        content = content.lstrip()
+                    if content:
+                        new_content.append(content)
+                        yield content
+
+        except httpx.HTTPError as e:
+            content = f"\nException: {e!r}"
+            new_content.append(content)
+            yield content
+
+        session.session.extend([User(query), Assistant("".join(new_content))])
+
+    def ask(self, session, query, *, max_query_size=5, timeout=60):
+        asyncio.run(
+            self.aask(session, query, max_query_size=max_query_size, timeout=timeout)
+        )
+        return session.session[-1].message
+
+    @classmethod
+    def get_key(cls):
+        return get_key("huggingface_api_token")
+
+
+def factory():
+    """Uses the huggingface text-generation-interface web API"""
+    return HuggingFace()
--- a/src/chap/backends/llama_cpp.py
+++ b/src/chap/backends/llama_cpp.py
@ -17,6 +17,11 @@ class LlamaCpp:
        url: str = "http://localhost:8080/completion"
        """The URL of a llama.cpp server's completion endpoint."""

+        start_prompt: str = """<s>[INST] <<SYS>>\n"""
+        after_system: str = "\n<</SYS>>\n\n"
+        after_user: str = """ [/INST] """
+        after_assistant: str = """ </s><s>[INST] """
+
    def __init__(self):
        self.parameters = self.Parameters()

@ -26,29 +31,30 @@ A dialog, where USER interacts with AI. AI is helpful, kind, obedient, honest, a

    def make_full_query(self, messages, max_query_size):
        del messages[1:-max_query_size]
-        rows = []
+        result = [self.parameters.start_prompt]
        for m in messages:
            content = (m.content or "").strip()
            if not content:
                continue
+            result.append(content)
            if m.role == "system":
-                rows.append(f"ASSISTANT'S RULE: {content}\n")
+                result.append(self.parameters.after_system)
            elif m.role == "assistant":
-                rows.append(f"ASSISTANT: {content}\n")
+                result.append(self.parameters.after_assistant)
            elif m.role == "user":
-                rows.append(f"USER: {content}")
-        rows.append("ASSISTANT: ")
-        full_query = ("\n".join(rows)).rstrip()
+                result.append(self.parameters.after_user)
+        full_query = "".join(result)
        return full_query

    async def aask(
-        self, session, query, *, max_query_size=5, timeout=60
+        self, session, query, *, max_query_size=5, timeout=180
    ):  # pylint: disable=unused-argument,too-many-locals,too-many-branches
        params = {
            "prompt": self.make_full_query(
                session.session + [User(query)], max_query_size
            ),
            "stream": True,
+            "stop": ["</s>", "<s>", "[INST]"],
        }
        new_content = []
        try:
--- a/src/chap/commands/tui.css
+++ b/src/chap/commands/tui.css
@ -4,6 +4,15 @@
 * SPDX-License-Identifier: MIT
 */

+.role_user.history_exclude, .role_assistant.history_exclude {
+    color: $text-disabled;
+    border-left: dashed $primary;
+}
+.role_assistant.history_exclude:focus-within {
+    color: $text-disabled;
+    border-left: dashed $secondary;
+}
+
 .role_system {
    text-style: italic;
    color: $text-muted;
--- a/src/chap/commands/tui.py
+++ b/src/chap/commands/tui.py
@ -29,6 +29,7 @@ class Markdown(
        Binding("ctrl+y", "yank", "Yank text", show=True),
        Binding("ctrl+r", "resubmit", "resubmit", show=True),
        Binding("ctrl+x", "delete", "delete to end", show=True),
+        Binding("ctrl+q", "toggle_history", "history toggle", show=True),
    ]


@ -43,7 +44,7 @@ def markdown_for_step(step):
 class Tui(App):
    CSS_PATH = "tui.css"
    BINDINGS = [
-        Binding("ctrl+q", "app.quit", "Quit", show=True, priority=True),
+        Binding("ctrl+c", "app.quit", "Quit", show=True, priority=True),
    ]

    def __init__(self, api=None, session=None):
@ -82,6 +83,12 @@ class Tui(App):
        tokens = []
        update = asyncio.Queue(1)

+        # Construct a fake session with only select items
+        session = Session()
+        for si, wi in zip(self.session.session, self.container.children):
+            if not wi.has_class("history_exclude"):
+                session.session.append(si)
+
        async def render_fun():
            while await update.get():
                if tokens:
@ -90,7 +97,7 @@ class Tui(App):
                await asyncio.sleep(0.1)

        async def get_token_fun():
-            async for token in self.api.aask(self.session, event.value):
+            async for token in self.api.aask(session, event.value):
                tokens.append(token)
                try:
                    update.put_nowait(True)
@ -102,6 +109,7 @@ class Tui(App):
            await asyncio.gather(render_fun(), get_token_fun())
            self.input.value = ""
        finally:
+            self.session.session.extend(session.session[-2:])
            all_output = self.session.session[-1].content
            output.update(all_output)
            output._markdown = all_output  # pylint: disable=protected-access
@ -118,6 +126,19 @@ class Tui(App):
            content = widget._markdown  # pylint: disable=protected-access
            subprocess.run(["xsel", "-ib"], input=content.encode("utf-8"), check=False)

+    def action_toggle_history(self):
+        widget = self.focused
+        if not isinstance(widget, Markdown):
+            return
+        children = self.container.children
+        idx = children.index(widget)
+        while idx > 1 and not "role_user" in children[idx].classes:
+            idx -= 1
+        widget = children[idx]
+
+        children[idx].toggle_class("history_exclude")
+        children[idx + 1].toggle_class("history_exclude")
+
    async def action_resubmit(self):
        await self.delete_or_resubmit(True)

@ -130,7 +151,7 @@ class Tui(App):
            return
        children = self.container.children
        idx = children.index(widget)
-        while idx > 1 and not "role_user" in children[idx].classes:
+        while idx > 1 and not children[idx].has_class("role_user"):
            idx -= 1
        widget = children[idx]

--- a/src/chap/core.py
+++ b/src/chap/core.py
@ -134,7 +134,7 @@ def format_backend_help(api, formatter):
            doc = get_attribute_docstring(type(api.parameters), f.name).docstring_below
            if doc:
                doc += " "
-            doc += f"(Default: {default})"
+            doc += f"(Default: {default!r})"
            rows.append((f"-B {name}:{f.type.__name__.upper()}", doc))
        formatter.write_dl(rows)