Merge pull request #39 from jepler/misc-updates

This commit is contained in:
Jeff Epler 2024-10-22 07:57:56 -05:00 committed by GitHub
commit 8f126d3516
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 17 additions and 5 deletions

View file

@ -51,7 +51,6 @@ A dialog, where USER interacts with AI. AI is helpful, kind, obedient, honest, a
continue continue
result.append(formats[m.role].format(content)) result.append(formats[m.role].format(content))
full_query = "".join(result) full_query = "".join(result)
print("fq", full_query)
return full_query return full_query
async def aask( async def aask(
@ -102,5 +101,10 @@ A dialog, where USER interacts with AI. AI is helpful, kind, obedient, honest, a
def factory() -> Backend: def factory() -> Backend:
"""Uses the llama.cpp completion web API""" """Uses the llama.cpp completion web API
Note: Consider using the openai-chatgpt backend with a custom URL instead.
The llama.cpp server will automatically apply common chat templates with the
openai-chatgpt backend, while chat templates must be manually configured client side
with this backend."""
return LlamaCpp() return LlamaCpp()

View file

@ -66,14 +66,20 @@ class EncodingMeta:
class ChatGPT: class ChatGPT:
@dataclass @dataclass
class Parameters: class Parameters:
model: str = "gpt-4-turbo" model: str = "gpt-4o-mini"
"""The model to use. The most common alternative value is 'gpt-3.5-turbo'.""" """The model to use. The most common alternative value is 'gpt-4o'."""
max_request_tokens: int = 1024 max_request_tokens: int = 1024
"""The approximate greatest number of tokens to send in a request. When the session is long, the system prompt and 1 or more of the most recent interaction steps are sent.""" """The approximate greatest number of tokens to send in a request. When the session is long, the system prompt and 1 or more of the most recent interaction steps are sent."""
url: str = "https://api.openai.com/v1/chat/completions" url: str = "https://api.openai.com/v1/chat/completions"
"""The URL of a chatgpt-pcompatible server's completion endpoint.""" """The URL of a chatgpt-compatible server's completion endpoint. Notably, llama.cpp's server is compatible with this backend, and can automatically apply common chat templates too."""
temperature: float | None = None
"""The model temperature for sampling"""
top_p: float | None = None
"""The model temperature for sampling"""
def __init__(self) -> None: def __init__(self) -> None:
self.parameters = self.Parameters() self.parameters = self.Parameters()
@ -135,6 +141,8 @@ class ChatGPT:
headers={"authorization": f"Bearer {self.get_key()}"}, headers={"authorization": f"Bearer {self.get_key()}"},
json={ json={
"model": self.parameters.model, "model": self.parameters.model,
"temperature": self.parameters.temperature,
"top_p": self.parameters.top_p,
"stream": True, "stream": True,
"messages": session_to_list(full_prompt), "messages": session_to_list(full_prompt),
}, },