update llama backend for llama-3-instruct style

This commit is contained in:
Jeff Epler 2024-06-23 14:45:03 -05:00
parent aa1cb90c47
commit 415e48496a

View file

@ -18,10 +18,16 @@ class LlamaCpp(AutoAskMixin):
url: str = "http://localhost:8080/completion" url: str = "http://localhost:8080/completion"
"""The URL of a llama.cpp server's completion endpoint.""" """The URL of a llama.cpp server's completion endpoint."""
start_prompt: str = "<s>" start_prompt: str = "<|begin_of_text|>"
system_format: str = "<<SYS>>{}<</SYS>>" system_format: str = (
user_format: str = " [INST] {} [/INST]" "<|start_header_id|>system<|end_header_id|>\n\n{}<|eot_id|>"
assistant_format: str = " {}</s>" )
user_format: str = "<|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|>"
assistant_format: str = (
"<|start_header_id|>assistant<|end_header_id|>\n\n{}<|eot_id|>"
)
end_prompt: str = "<|start_header_id|>assistant<|end_header_id|>\n\n"
stop: str | None = None
def __init__(self) -> None: def __init__(self) -> None:
super().__init__() super().__init__()
@ -59,7 +65,7 @@ A dialog, where USER interacts with AI. AI is helpful, kind, obedient, honest, a
params = { params = {
"prompt": self.make_full_query(session + [User(query)], max_query_size), "prompt": self.make_full_query(session + [User(query)], max_query_size),
"stream": True, "stream": True,
"stop": ["</s>", "<s>", "[INST]"], "stop": ["</s>", "<s>", "[INST]", "<|eot_id|>"],
} }
new_content: list[str] = [] new_content: list[str] = []
try: try: