diff --git a/src/chap/backends/llama_cpp.py b/src/chap/backends/llama_cpp.py
index 4854b43..e0ae2b8 100644
--- a/src/chap/backends/llama_cpp.py
+++ b/src/chap/backends/llama_cpp.py
@@ -17,6 +17,11 @@ class LlamaCpp:
url: str = "http://localhost:8080/completion"
"""The URL of a llama.cpp server's completion endpoint."""
+ start_prompt: str = """[INST] <>\n"""
+ after_system: str = "\n<>\n\n"
+ after_user: str = """ [/INST] """
+ after_assistant: str = """ [INST] """
+
def __init__(self):
self.parameters = self.Parameters()
@@ -26,19 +31,19 @@ A dialog, where USER interacts with AI. AI is helpful, kind, obedient, honest, a
def make_full_query(self, messages, max_query_size):
del messages[1:-max_query_size]
- rows = []
+ result = [self.parameters.start_prompt]
for m in messages:
content = (m.content or "").strip()
if not content:
continue
+ result.append(content)
if m.role == "system":
- rows.append(f"ASSISTANT'S RULE: {content}\n")
+ result.append(self.parameters.after_system)
elif m.role == "assistant":
- rows.append(f"ASSISTANT: {content}\n")
+ result.append(self.parameters.after_assistant)
elif m.role == "user":
- rows.append(f"USER: {content}")
- rows.append("ASSISTANT: ")
- full_query = ("\n".join(rows)).rstrip()
+ result.append(self.parameters.after_user)
+ full_query = "".join(result)
return full_query
async def aask(