Use llama2-instruct style prompting

this also works well with mistral-7b-instruct See https://github.com/facebookresearch/llama/blob/v2/llama/generation.py
2023-09-29 08:32:41 -05:00 · 2023-09-29 08:32:41 -05:00 · ea03aa0f20
commit ea03aa0f20
parent 9fe01de170
1 changed files with 11 additions and 6 deletions
--- a/src/chap/backends/llama_cpp.py
+++ b/src/chap/backends/llama_cpp.py
@ -17,6 +17,11 @@ class LlamaCpp:
        url: str = "http://localhost:8080/completion"
        """The URL of a llama.cpp server's completion endpoint."""

+        start_prompt: str = """<s>[INST] <<SYS>>\n"""
+        after_system: str = "\n<</SYS>>\n\n"
+        after_user: str = """ [/INST] """
+        after_assistant: str = """ </s><s>[INST] """
+
    def __init__(self):
        self.parameters = self.Parameters()

@ -26,19 +31,19 @@ A dialog, where USER interacts with AI. AI is helpful, kind, obedient, honest, a

    def make_full_query(self, messages, max_query_size):
        del messages[1:-max_query_size]
-        rows = []
+        result = [self.parameters.start_prompt]
        for m in messages:
            content = (m.content or "").strip()
            if not content:
                continue
+            result.append(content)
            if m.role == "system":
-                rows.append(f"ASSISTANT'S RULE: {content}\n")
+                result.append(self.parameters.after_system)
            elif m.role == "assistant":
-                rows.append(f"ASSISTANT: {content}\n")
+                result.append(self.parameters.after_assistant)
            elif m.role == "user":
-                rows.append(f"USER: {content}")
-        rows.append("ASSISTANT: ")
-        full_query = ("\n".join(rows)).rstrip()
+                result.append(self.parameters.after_user)
+        full_query = "".join(result)
        return full_query

    async def aask(