diff --git a/src/chap/backends/llama_cpp.py b/src/chap/backends/llama_cpp.py index 4854b43..e0ae2b8 100644 --- a/src/chap/backends/llama_cpp.py +++ b/src/chap/backends/llama_cpp.py @@ -17,6 +17,11 @@ class LlamaCpp: url: str = "http://localhost:8080/completion" """The URL of a llama.cpp server's completion endpoint.""" + start_prompt: str = """[INST] <>\n""" + after_system: str = "\n<>\n\n" + after_user: str = """ [/INST] """ + after_assistant: str = """ [INST] """ + def __init__(self): self.parameters = self.Parameters() @@ -26,19 +31,19 @@ A dialog, where USER interacts with AI. AI is helpful, kind, obedient, honest, a def make_full_query(self, messages, max_query_size): del messages[1:-max_query_size] - rows = [] + result = [self.parameters.start_prompt] for m in messages: content = (m.content or "").strip() if not content: continue + result.append(content) if m.role == "system": - rows.append(f"ASSISTANT'S RULE: {content}\n") + result.append(self.parameters.after_system) elif m.role == "assistant": - rows.append(f"ASSISTANT: {content}\n") + result.append(self.parameters.after_assistant) elif m.role == "user": - rows.append(f"USER: {content}") - rows.append("ASSISTANT: ") - full_query = ("\n".join(rows)).rstrip() + result.append(self.parameters.after_user) + full_query = "".join(result) return full_query async def aask(