mf1
This commit is contained in:
@@ -1,19 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
if [[ ! -d "/home/nx2/test-small-llms" ]]; then
|
||||
echo "Cannot find source directory; Did you move it?"
|
||||
echo "(Looking for "/home/nx2/test-small-llms")"
|
||||
echo 'Cannot force reload with this script - use "direnv reload" manually and then try again'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# rebuild the cache forcefully
|
||||
_nix_direnv_force_reload=1 direnv exec "/home/nx2/test-small-llms" true
|
||||
|
||||
# Update the mtime for .envrc.
|
||||
# This will cause direnv to reload again - but without re-building.
|
||||
touch "/home/nx2/test-small-llms/.envrc"
|
||||
|
||||
# Also update the timestamp of whatever profile_rc we have.
|
||||
# This makes sure that we know we are up to date.
|
||||
touch -r "/home/nx2/test-small-llms/.envrc" "/home/nx2/test-small-llms/.direnv"/*.rc
|
||||
@@ -1 +0,0 @@
|
||||
/nix/store/j2vf461mp9h2y9awkklbfawf3dz7cs1p-nix-shell-env
|
||||
File diff suppressed because it is too large
Load Diff
@@ -20,11 +20,10 @@ from langchain_core.language_models import LanguageModelInput
|
||||
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage, BaseMessage, ToolCall
|
||||
from langchain_core.outputs import ChatGeneration, ChatResult
|
||||
from langchain_core.prompts import SystemMessagePromptTemplate
|
||||
from langchain_core.pydantic_v1 import BaseModel
|
||||
from langchain_core.pydantic_v1 import BaseModel
|
||||
from langchain_core.runnables import Runnable
|
||||
from langchain_core.tools import BaseTool, Tool
|
||||
from langchain_core.utils.pydantic import is_basemodel_instance, is_basemodel_subclass
|
||||
from textwrap import dedent
|
||||
|
||||
from libs.functions import nxhash
|
||||
|
||||
@@ -98,14 +97,15 @@ def _is_pydantic_class(obj: Any) -> bool:
|
||||
is_basemodel_subclass(obj) or BaseModel in obj.__bases__
|
||||
)
|
||||
|
||||
class OllamaFunctions(ChatOllama):
|
||||
class OllamaFunctionsBase(ChatOllama):
|
||||
"""Function chat model that uses Ollama API."""
|
||||
|
||||
tool_system_prompt_template: str = DEFAULT_SYTEM_PROMPT
|
||||
tool_system_prompt_template_with_history: str = DEFAULT_SYTEM_PROMPT_WITH_HISTORY
|
||||
max_tool_call_fails: int = 5
|
||||
|
||||
def __init__(self, max_tool_call_fails, **kwargs: Any) -> None:
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def bind_tools(
|
||||
@@ -115,6 +115,8 @@ class OllamaFunctions(ChatOllama):
|
||||
) -> Runnable[LanguageModelInput, BaseMessage]:
|
||||
return self.bind(functions=tools, **kwargs)
|
||||
|
||||
def _get_final_message(self, messages: list, functions_str: str) -> list:
|
||||
raise NotImplementedError
|
||||
|
||||
def _generate(self, messages: List[BaseMessage], stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any) -> ChatResult:
|
||||
def _convert_to_ollama_tool(self, tool: Any) -> Dict:
|
||||
@@ -177,11 +179,11 @@ class OllamaFunctions(ChatOllama):
|
||||
return called_tool
|
||||
|
||||
def _extract_conversaional_response(self, d: dict) -> str:
|
||||
if ("tool_input" in d and "response" in d["tool_input"]):
|
||||
if ("tool_input" in d and d["tool_input"] and "response" in d["tool_input"]):
|
||||
response = d["tool_input"]["response"]
|
||||
elif ("input" in d and "response" in d["input"]):
|
||||
elif ("input" in d and d["input"] and "response" in d["input"]):
|
||||
response = d["input"]["response"]
|
||||
elif ("args" in d and "response" in d["args"]):
|
||||
elif ("args" in d and d["args"] and "response" in d["args"]):
|
||||
response = d["args"]["response"]
|
||||
elif "response" in d:
|
||||
response = d["response"]
|
||||
@@ -220,66 +222,6 @@ class OllamaFunctions(ChatOllama):
|
||||
called_tool_args = {}
|
||||
return called_tool_args
|
||||
|
||||
def _get_final_message(self, messages: list, functions_str: str) -> list:
|
||||
def _get_system_msg_and_formatted_history(self, messages: list) -> Tuple[str, str]:
|
||||
def _format_tools_for_history(tool_calls: list[ToolCall]) -> str:
|
||||
call_list = []
|
||||
for c in tool_calls:
|
||||
call_list.append({
|
||||
"id": nxhash(c['id'])[-4:],
|
||||
"tool": c['name'],
|
||||
"args": c['args']
|
||||
})
|
||||
if len(call_list) == 1:
|
||||
return json.dumps(obj=call_list[0], ensure_ascii=False, indent=2)
|
||||
else:
|
||||
return json.dumps(obj=call_list, ensure_ascii=False, indent=2)
|
||||
formated_history = ""
|
||||
system_msg = messages[0]
|
||||
for m in messages[1:]:
|
||||
|
||||
if formated_history != "":
|
||||
formated_history += "\n\n"
|
||||
|
||||
if isinstance(m, SystemMessage):
|
||||
formated_history += "The system provided the info:\n" + str(m.content)
|
||||
elif isinstance(m, HumanMessage):
|
||||
formated_history += "The Human said:\n" + str(m.content)
|
||||
elif isinstance(m, AIMessage) and m.tool_calls:
|
||||
formated_history += "So you called the tool" + (":\n" if len(m.tool_calls) == 1 else "s:\n") + _format_tools_for_history(m.tool_calls)
|
||||
elif isinstance(m, ToolMessage):
|
||||
formated_history += "To which the tool (" + nxhash(m.tool_call_id)[-4:] + ") replied with:\n" + str(m.content)
|
||||
elif isinstance(m, AIMessage) and not m.tool_calls:
|
||||
formated_history += "You said:\n" + str(m.content)
|
||||
else:
|
||||
raise TypeError("OllamaFunctions only supports SystemMessage HumanMessage ToolMessage AIMessage but got " + str(type(m)))
|
||||
|
||||
return system_msg, formated_history
|
||||
|
||||
# prepare generation with history
|
||||
if True in [ isinstance(m, ToolMessage) for m in messages ]:
|
||||
system_msg, formated_history = _get_system_msg_and_formatted_history(self, messages=messages)
|
||||
|
||||
system_message_prompt_template = SystemMessagePromptTemplate.from_template(self.tool_system_prompt_template_with_history)
|
||||
system_message = system_message_prompt_template.format(
|
||||
tools=functions_str,
|
||||
history=formated_history,
|
||||
system_msg=system_msg
|
||||
)
|
||||
final_messages = [ system_message ]
|
||||
|
||||
# prepare generation without history
|
||||
else:
|
||||
system_message_prompt_template = SystemMessagePromptTemplate.from_template(self.tool_system_prompt_template)
|
||||
system_message = system_message_prompt_template.format(
|
||||
tools=functions_str
|
||||
)
|
||||
final_messages = [ system_message ] + messages
|
||||
|
||||
return final_messages
|
||||
|
||||
|
||||
|
||||
|
||||
def gen(self, failed_tool_calls: int, messages: list) -> ChatResult:
|
||||
|
||||
@@ -289,7 +231,7 @@ class OllamaFunctions(ChatOllama):
|
||||
functions_str = json.dumps(functions_list, indent=2)
|
||||
|
||||
# get messages to prompt with
|
||||
final_messages = _get_final_message(self, messages=messages, functions_str=functions_str)
|
||||
final_messages = self._get_final_message(messages=messages, functions_str=functions_str)
|
||||
|
||||
# genrerate chat result
|
||||
response_message = super()._generate(final_messages, stop=stop, run_manager=run_manager, **kwargs)
|
||||
@@ -329,6 +271,125 @@ class OllamaFunctions(ChatOllama):
|
||||
return gen(self, failed_tool_calls=0, messages=messages)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class OllamaFunctionsLSM(OllamaFunctionsBase):
|
||||
"""Function chat model that uses Ollama API."""
|
||||
|
||||
def _get_final_message(self, messages: list, functions_str: str) -> list:
|
||||
def _get_system_msg_and_formatted_history(self, messages: list) -> Tuple[str, str]:
|
||||
def _format_tools_for_history(tool_calls: list[ToolCall]) -> str:
|
||||
call_list = []
|
||||
for c in tool_calls:
|
||||
call_list.append({
|
||||
"id": nxhash(c['id'])[-4:],
|
||||
"tool": c['name'],
|
||||
"args": c['args']
|
||||
})
|
||||
if len(call_list) == 1:
|
||||
return json.dumps(obj=call_list[0], ensure_ascii=False, indent=2)
|
||||
else:
|
||||
return json.dumps(obj=call_list, ensure_ascii=False, indent=2)
|
||||
formated_history = ""
|
||||
system_msg = messages[0]
|
||||
for m in messages[1:]:
|
||||
|
||||
if formated_history != "":
|
||||
formated_history += "\n\n"
|
||||
|
||||
if isinstance(m, SystemMessage):
|
||||
formated_history += "The system provided the info:\n" + str(m.content)
|
||||
elif isinstance(m, HumanMessage):
|
||||
formated_history += "The Human said:\n" + str(m.content)
|
||||
elif isinstance(m, AIMessage) and m.tool_calls:
|
||||
formated_history += "So you called the tool" + (":\n" if len(m.tool_calls) == 1 else "s:\n") + _format_tools_for_history(m.tool_calls)
|
||||
elif isinstance(m, ToolMessage):
|
||||
formated_history += "To which the tool (" + nxhash(m.tool_call_id)[-4:] + ") replied with:\n" + str(m.content)
|
||||
elif isinstance(m, AIMessage) and not m.tool_calls:
|
||||
formated_history += "You said:\n" + str(m.content)
|
||||
else:
|
||||
try:
|
||||
raise TypeError("OllamaFunctions only supports SystemMessage HumanMessage ToolMessage AIMessage but got " + str(type(m)))
|
||||
except NameError:
|
||||
raise TypeError("OllamaFunctions only supports SystemMessage HumanMessage ToolMessage AIMessage.")
|
||||
|
||||
|
||||
return system_msg, formated_history
|
||||
|
||||
# prepare generation with history
|
||||
if True in [ isinstance(m, ToolMessage) for m in messages ]:
|
||||
system_msg, formated_history = _get_system_msg_and_formatted_history(self, messages=messages)
|
||||
|
||||
system_message_prompt_template = SystemMessagePromptTemplate.from_template(self.tool_system_prompt_template_with_history)
|
||||
system_message = system_message_prompt_template.format(
|
||||
tools=functions_str,
|
||||
history=formated_history,
|
||||
system_msg=system_msg
|
||||
)
|
||||
final_messages = [ system_message ]
|
||||
|
||||
# prepare generation without history
|
||||
else:
|
||||
system_message_prompt_template = SystemMessagePromptTemplate.from_template(self.tool_system_prompt_template)
|
||||
system_message = system_message_prompt_template.format(
|
||||
tools=functions_str
|
||||
)
|
||||
final_messages = [ system_message ] + messages
|
||||
|
||||
return final_messages
|
||||
|
||||
@property
|
||||
def _llm_type(self) -> str:
|
||||
return "ollama_functions"
|
||||
return "ollama_functions_lsm"
|
||||
|
||||
|
||||
|
||||
class OllamaFunctionsT2S(OllamaFunctionsBase):
|
||||
"""Function chat model that uses Ollama API."""
|
||||
|
||||
def _get_final_message(self, messages: list, functions_str: str) -> list:
|
||||
# prepare generation with history
|
||||
if True in [ isinstance(m, ToolMessage) for m in messages ]:
|
||||
|
||||
transformed_messages = []
|
||||
for m in messages:
|
||||
if isinstance(m, ToolMessage):
|
||||
transformed_messages.append(SystemMessage(content=(
|
||||
f"The Tool '{m.name}' replied with:" + "\n" + str(m.content)
|
||||
)))
|
||||
elif isinstance(m, AIMessage):
|
||||
if m.tool_calls:
|
||||
l = []
|
||||
for call in m.tool_calls:
|
||||
l.append({
|
||||
"tool": call['name'],
|
||||
"tool_input": call['args']
|
||||
})
|
||||
if len(l) == 1:
|
||||
transformed_messages.append(AIMessage(content=json.dumps(l[0])))
|
||||
else:
|
||||
transformed_messages.append(AIMessage(content=json.dumps(l)))
|
||||
else:
|
||||
transformed_messages.append(m)
|
||||
|
||||
system_message_prompt_template = SystemMessagePromptTemplate.from_template(self.tool_system_prompt_template)
|
||||
system_message = system_message_prompt_template.format(tools=functions_str)
|
||||
|
||||
final_messages = [ system_message ] + transformed_messages
|
||||
|
||||
# prepare generation without history
|
||||
else:
|
||||
system_message_prompt_template = SystemMessagePromptTemplate.from_template(self.tool_system_prompt_template)
|
||||
system_message = system_message_prompt_template.format(
|
||||
tools=functions_str
|
||||
)
|
||||
final_messages = [ system_message ] + messages
|
||||
|
||||
return final_messages
|
||||
|
||||
@property
|
||||
def _llm_type(self) -> str:
|
||||
return "ollama_functions_t2s"
|
||||
|
||||
@@ -79,8 +79,8 @@ def run_tests(models: dict[int, Model], seeds: list[int], tests: dict[int, Test]
|
||||
'technique_name': technique.name,
|
||||
})
|
||||
|
||||
# if hash_key == "DE3D137E":
|
||||
# pass
|
||||
if hash_key == "0DEB2030":
|
||||
pass
|
||||
|
||||
if hash_key not in saved_results.keys():
|
||||
try:
|
||||
@@ -105,7 +105,7 @@ def run_tests(models: dict[int, Model], seeds: list[int], tests: dict[int, Test]
|
||||
"\033[0;35m)\033[0m",
|
||||
end=""
|
||||
)
|
||||
answer = test.runnable(model=model, seed=seed, test=test, base_url=base_url)
|
||||
answer = test.runnable(model=model, seed=seed, test=test, technique=technique, base_url=base_url)
|
||||
if isinstance(answer, str):
|
||||
combination['answer'] = answer
|
||||
# combination['tool_calls'] = [] # no entry
|
||||
@@ -172,13 +172,15 @@ def run_tests(models: dict[int, Model], seeds: list[int], tests: dict[int, Test]
|
||||
|
||||
try:
|
||||
entry = {
|
||||
'test_name': result['test_name'],
|
||||
'test_id': result['test_id'],
|
||||
'model_name': result['model_name'],
|
||||
'model_id': result['model_id'],
|
||||
'seed': result['seed'],
|
||||
'answer': result['answer'],
|
||||
'validation': result['test'].validator(test=result['test'], answer=result['answer'], base_url=base_url),
|
||||
'test_name': result['test_name'],
|
||||
'test_id': result['test_id'],
|
||||
'model_name': result['model_name'],
|
||||
'model_id': result['model_id'],
|
||||
'technique_name': result['technique_name'],
|
||||
'technique_id': result['technique_id'],
|
||||
'seed': result['seed'],
|
||||
'answer': result['answer'],
|
||||
'validation': result['test'].validator(test=result['test'], answer=result['answer'], base_url=base_url),
|
||||
}
|
||||
except Exception as e:
|
||||
print("\033[0;31mError validating entry (\033[0m" + hash_key + "\033[0;31m). <\033[0m" + str(e) + "\033[0;31m> Continuing...\033[0m ")
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from types import NoneType
|
||||
from langchain_ollama.chat_models import ChatOllama
|
||||
from libs.ollama_functions import OllamaFunctions
|
||||
from libs.ollama_functions import OllamaFunctionsLSM, OllamaFunctionsT2S
|
||||
from langchain_core.messages import AIMessage, SystemMessage, HumanMessage, ToolMessage
|
||||
from libs.classes import Test, Model
|
||||
from libs.classes import Technique, Test, Model
|
||||
from langchain.tools import Tool
|
||||
from typing import Literal
|
||||
|
||||
@@ -10,22 +10,31 @@ from langgraph.graph import StateGraph, MessagesState
|
||||
import json
|
||||
from pydantic import ValidationError
|
||||
|
||||
def _get_llm(model: Model, base_url: str, seed: int, tools: list[Tool]|NoneType = None):
|
||||
if model.supports_tools:
|
||||
from suite_settings.techniques import techniques
|
||||
|
||||
def _get_llm(model: Model, base_url: str, seed: int, technique: Technique, tools: list[Tool]|NoneType = None):
|
||||
if technique == techniques[1]: # Native
|
||||
llm = ChatOllama(
|
||||
model=model.identifier,
|
||||
seed=seed,
|
||||
base_url=base_url
|
||||
)
|
||||
else:
|
||||
llm = OllamaFunctions(
|
||||
elif technique == techniques[903]: # Long System Message
|
||||
llm = OllamaFunctionsLSM(
|
||||
model=model.identifier,
|
||||
seed=seed,
|
||||
base_url=base_url,
|
||||
format="json",
|
||||
max_tool_call_fails=3,
|
||||
temperature=0.0
|
||||
)
|
||||
elif technique == techniques[572]: # ToolMessages to SystemMessages
|
||||
llm = OllamaFunctionsT2S(
|
||||
model=model.identifier,
|
||||
seed=seed,
|
||||
base_url=base_url,
|
||||
format="json",
|
||||
)
|
||||
else:
|
||||
raise ValueError("Unkown Technique in _get_llm()")
|
||||
|
||||
if tools:
|
||||
llm = llm.bind_tools(tools=tools)
|
||||
@@ -33,7 +42,7 @@ def _get_llm(model: Model, base_url: str, seed: int, tools: list[Tool]|NoneType
|
||||
return llm
|
||||
|
||||
|
||||
def basic_prompt(model: Model, seed: int, test: Test, base_url: str) -> str:
|
||||
def basic_prompt(model: Model, seed: int, test: Test, technique: Technique, base_url: str) -> str:
|
||||
|
||||
messages = [SystemMessage(test.runnable_input['system_msg'])]
|
||||
try:
|
||||
@@ -42,20 +51,20 @@ def basic_prompt(model: Model, seed: int, test: Test, base_url: str) -> str:
|
||||
pass
|
||||
messages += [ HumanMessage(test.runnable_input['human_msg']) ]
|
||||
|
||||
llm = _get_llm(model=model, base_url=base_url, seed=seed)
|
||||
llm = _get_llm(model=model, base_url=base_url, technique=technique, seed=seed)
|
||||
ai_msg = llm.invoke(messages)
|
||||
assert isinstance(ai_msg.content, str)
|
||||
return ai_msg.content
|
||||
|
||||
|
||||
|
||||
def one_tool_call_answer(model: Model, seed: int, test: Test, base_url: str) -> dict:
|
||||
def one_tool_call_answer(model: Model, seed: int, test: Test, technique: Technique, base_url: str) -> dict:
|
||||
|
||||
tools_dict = test.runnable_input['tools']
|
||||
tools = []
|
||||
for key in tools_dict:
|
||||
tools.append(tools_dict[key])
|
||||
llm = _get_llm(model=model, base_url=base_url, seed=seed, tools=tools)
|
||||
llm = _get_llm(model=model, base_url=base_url, seed=seed, technique=technique, tools=tools)
|
||||
|
||||
messages = [SystemMessage(test.runnable_input['system_msg'])]
|
||||
try:
|
||||
@@ -108,7 +117,7 @@ def one_tool_call_answer(model: Model, seed: int, test: Test, base_url: str) ->
|
||||
"tool_calls": tool_calls,
|
||||
}
|
||||
|
||||
def agent_with_tools(model: Model, seed: int, test: Test, base_url: str) -> dict[str, str|list]:
|
||||
def agent_with_tools(model: Model, seed: int, test: Test, technique: Technique, base_url: str) -> dict[str, str|list]:
|
||||
|
||||
tool_calls = []
|
||||
index = -1
|
||||
@@ -173,7 +182,7 @@ def agent_with_tools(model: Model, seed: int, test: Test, base_url: str) -> dict
|
||||
for key in tools_dict:
|
||||
tools.append(tools_dict[key])
|
||||
tool_node = NxToolNode(tools)
|
||||
llm = _get_llm(model=model, base_url=base_url, seed=seed, tools=tools)
|
||||
llm = _get_llm(model=model, base_url=base_url, seed=seed, technique=technique, tools=tools)
|
||||
|
||||
workflow = StateGraph(MessagesState)
|
||||
|
||||
|
||||
@@ -141,6 +141,9 @@ def get_notes_containing(patterns: Union[list[str], str]) -> str:
|
||||
ret += f"{datetime.strftime(entry.time, '%Y/%m/%d %H:%M')} {entry.content}"
|
||||
is_first = False
|
||||
|
||||
if ret == "":
|
||||
ret = "No matching notes were found. Try diffrent patterns."
|
||||
|
||||
return ret
|
||||
|
||||
@tool
|
||||
|
||||
@@ -1,96 +0,0 @@
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
def print_help():
|
||||
print("""Example usages:
|
||||
|
||||
python print_saved_results.py
|
||||
python print_saved_results.py -m llama3.1
|
||||
python print_saved_results.py -m llama3.1,mixtral-nemo:12b
|
||||
python print_saved_results.py -m llama3.1 -s 2222,2 -t "Healthy Vegetables in Chinese"
|
||||
|
||||
Note: If one of the "fileters" does not exist, no error is thrown.""")
|
||||
|
||||
def main(argv: list[str]) -> None:
|
||||
try:
|
||||
with open("./saved_results.json", "r") as f:
|
||||
saved_results = json.load(fp=f)
|
||||
except:
|
||||
print("saved_results.json not found. Try running test_suite.py first.")
|
||||
exit(1)
|
||||
|
||||
if "-h" in argv:
|
||||
print_help()
|
||||
exit(0)
|
||||
|
||||
try:
|
||||
if "-m" in argv:
|
||||
test_str = argv[argv.index("-m")+1]
|
||||
assert test_str[0] != "-"
|
||||
models = test_str.split(",")
|
||||
argv.pop(argv.index("-m")+1)
|
||||
argv.pop(argv.index("-m"))
|
||||
else:
|
||||
models = None
|
||||
|
||||
if "-s" in argv:
|
||||
test_str = argv[argv.index("-s")+1]
|
||||
assert test_str[0] != "-"
|
||||
seeds = test_str.split(",")
|
||||
argv.pop(argv.index("-s")+1)
|
||||
argv.pop(argv.index("-s"))
|
||||
else:
|
||||
seeds = None
|
||||
|
||||
if "-t" in argv:
|
||||
test_str = argv[argv.index("-t")+1]
|
||||
assert test_str[0] != "-"
|
||||
tests = test_str.split(",")
|
||||
argv.pop(argv.index("-t")+1)
|
||||
argv.pop(argv.index("-t"))
|
||||
else:
|
||||
tests = None
|
||||
except:
|
||||
print("Syntax error. Run `python print_saved_results.py -h` for help.")
|
||||
print_help()
|
||||
exit(1)
|
||||
|
||||
argv.pop(0) # remove filename entry
|
||||
if argv != []:
|
||||
print("Syntax error. Run `python print_saved_results.py -h` for help.")
|
||||
print(f"Got unkown argument{'s' if len(argv) != 1 else ''}: {argv}")
|
||||
print_help()
|
||||
exit(1)
|
||||
|
||||
|
||||
first_print = True
|
||||
term_size = os.get_terminal_size()
|
||||
|
||||
for hash_key in saved_results:
|
||||
result = saved_results[hash_key]
|
||||
if models == None or result['model'] in models:
|
||||
if seeds == None or str(result['seed']) in seeds:
|
||||
if tests == None or result['test_name'] in tests:
|
||||
if not first_print: print('-' * term_size.columns)
|
||||
|
||||
print(
|
||||
"\n" +
|
||||
"\033[0;36mTest name:\033[0m " +
|
||||
result['test_name'] +
|
||||
"\n\033[0;36mModel:\033[0m " +
|
||||
result['model'] +
|
||||
"\n\033[0;36mSeed:\033[0m " +
|
||||
str(result['seed']) +
|
||||
"\n\033[0;36mValidation result:\033[0m " +
|
||||
str(result['validation']) +
|
||||
"\n\033[0;36mAnswer: »\033[0m" +
|
||||
result['answer'] +
|
||||
"\033[0;36m«\033[0m" +
|
||||
"\n"
|
||||
)
|
||||
|
||||
first_print = False
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(argv=sys.argv)
|
||||
1651
saved_results-bak.json
Normal file
1651
saved_results-bak.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,90 +1,90 @@
|
||||
from libs.classes import Model
|
||||
|
||||
models = {
|
||||
245: Model(
|
||||
display_name="llama3.1 8b",
|
||||
identifier="llama3.1",
|
||||
supports_tools=True,
|
||||
parameter_count_in_b=8
|
||||
),
|
||||
238: Model(
|
||||
display_name="llama3.1 70b",
|
||||
identifier="llama3.1:70b",
|
||||
supports_tools=True,
|
||||
parameter_count_in_b=70
|
||||
),
|
||||
120: Model(
|
||||
display_name="llama3 groq TU 8b",
|
||||
identifier="llama3-groq-tool-use",
|
||||
supports_tools=True,
|
||||
parameter_count_in_b=8
|
||||
),
|
||||
890: Model(
|
||||
display_name="llama3 groq TU 70b",
|
||||
identifier="llama3-groq-tool-use:70b",
|
||||
supports_tools=True,
|
||||
parameter_count_in_b=70
|
||||
),
|
||||
348: Model(
|
||||
display_name="Mixtral MoE 8x7b",
|
||||
identifier="mixtral:8x7b",
|
||||
supports_tools=False,
|
||||
parameter_count_in_b=13,
|
||||
),
|
||||
789: Model(
|
||||
display_name="Mixtral MoE 8x22b",
|
||||
identifier="mixtral:8x22b",
|
||||
supports_tools=True,
|
||||
parameter_count_in_b=39
|
||||
),
|
||||
445: Model(
|
||||
display_name="Gemma2 2b",
|
||||
identifier="gemma2:2b",
|
||||
supports_tools=False,
|
||||
parameter_count_in_b=2
|
||||
),
|
||||
475: Model(
|
||||
display_name="Gemma2 9b",
|
||||
identifier="gemma2:2b",
|
||||
supports_tools=False,
|
||||
parameter_count_in_b=9
|
||||
),
|
||||
626: Model(
|
||||
display_name="Gemma2 27b",
|
||||
identifier="gemma2:2b",
|
||||
supports_tools=False,
|
||||
parameter_count_in_b=27
|
||||
),
|
||||
229: Model(
|
||||
display_name="Phi3 3.8b",
|
||||
identifier="phi3",
|
||||
supports_tools=False,
|
||||
parameter_count_in_b=3.8
|
||||
),
|
||||
903: Model(
|
||||
display_name="Tinyllama 1.1b",
|
||||
identifier="tinyllama:1.1b",
|
||||
supports_tools=False,
|
||||
parameter_count_in_b=1.1
|
||||
),
|
||||
670: Model(
|
||||
display_name="Mistral Nemo 12b",
|
||||
identifier="mistral-nemo:12b",
|
||||
supports_tools=True,
|
||||
parameter_count_in_b=12
|
||||
),
|
||||
404: Model(
|
||||
display_name="Command R+ 104b",
|
||||
identifier="command-r-plus:104b",
|
||||
supports_tools=True,
|
||||
parameter_count_in_b=104
|
||||
),
|
||||
701: Model(
|
||||
display_name="Yi 6b",
|
||||
identifier="yi:6b",
|
||||
supports_tools=False,
|
||||
parameter_count_in_b=6
|
||||
),
|
||||
# 245: Model(
|
||||
# display_name="llama3.1 8b",
|
||||
# identifier="llama3.1",
|
||||
# supports_tools=True,
|
||||
# parameter_count_in_b=8
|
||||
# ),
|
||||
# 238: Model(
|
||||
# display_name="llama3.1 70b",
|
||||
# identifier="llama3.1:70b",
|
||||
# supports_tools=True,
|
||||
# parameter_count_in_b=70
|
||||
# ),
|
||||
# 120: Model(
|
||||
# display_name="llama3 groq TU 8b",
|
||||
# identifier="llama3-groq-tool-use",
|
||||
# supports_tools=True,
|
||||
# parameter_count_in_b=8
|
||||
# ),
|
||||
# 890: Model(
|
||||
# display_name="llama3 groq TU 70b",
|
||||
# identifier="llama3-groq-tool-use:70b",
|
||||
# supports_tools=True,
|
||||
# parameter_count_in_b=70
|
||||
# ),
|
||||
# 348: Model(
|
||||
# display_name="Mixtral MoE 8x7b",
|
||||
# identifier="mixtral:8x7b",
|
||||
# supports_tools=False,
|
||||
# parameter_count_in_b=13,
|
||||
# ),
|
||||
# 789: Model(
|
||||
# display_name="Mixtral MoE 8x22b",
|
||||
# identifier="mixtral:8x22b",
|
||||
# supports_tools=True,
|
||||
# parameter_count_in_b=39
|
||||
# ),
|
||||
# 445: Model(
|
||||
# display_name="Gemma2 2b",
|
||||
# identifier="gemma2:2b",
|
||||
# supports_tools=False,
|
||||
# parameter_count_in_b=2
|
||||
# ),
|
||||
# 475: Model(
|
||||
# display_name="Gemma2 9b",
|
||||
# identifier="gemma2:2b",
|
||||
# supports_tools=False,
|
||||
# parameter_count_in_b=9
|
||||
# ),
|
||||
# 626: Model(
|
||||
# display_name="Gemma2 27b",
|
||||
# identifier="gemma2:2b",
|
||||
# supports_tools=False,
|
||||
# parameter_count_in_b=27
|
||||
# ),
|
||||
# 229: Model(
|
||||
# display_name="Phi3 3.8b",
|
||||
# identifier="phi3",
|
||||
# supports_tools=False,
|
||||
# parameter_count_in_b=3.8
|
||||
# ),
|
||||
# 903: Model(
|
||||
# display_name="Tinyllama 1.1b",
|
||||
# identifier="tinyllama:1.1b",
|
||||
# supports_tools=False,
|
||||
# parameter_count_in_b=1.1
|
||||
# ),
|
||||
# 670: Model(
|
||||
# display_name="Mistral Nemo 12b",
|
||||
# identifier="mistral-nemo:12b",
|
||||
# supports_tools=True,
|
||||
# parameter_count_in_b=12
|
||||
# ),
|
||||
# 404: Model(
|
||||
# display_name="Command R+ 104b",
|
||||
# identifier="command-r-plus:104b",
|
||||
# supports_tools=True,
|
||||
# parameter_count_in_b=104
|
||||
# ),
|
||||
# 701: Model(
|
||||
# display_name="Yi 6b",
|
||||
# identifier="yi:6b",
|
||||
# supports_tools=False,
|
||||
# parameter_count_in_b=6
|
||||
# ),
|
||||
704: Model(
|
||||
display_name="Yi 9b",
|
||||
identifier="yi:9b",
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
from libs.classes import Technique
|
||||
|
||||
techniques = {
|
||||
190: Technique(
|
||||
1: Technique(
|
||||
name="Native",
|
||||
for_supports_tools=True,
|
||||
for_not_supports_tools=False,
|
||||
),
|
||||
903: Technique(
|
||||
name="Long System Message",
|
||||
name="LSM", # Long System Message
|
||||
for_supports_tools=False,
|
||||
for_not_supports_tools=True,
|
||||
),
|
||||
572: Technique(
|
||||
name="T2S", # Tool to System Messsages
|
||||
for_supports_tools=False,
|
||||
for_not_supports_tools=True,
|
||||
),
|
||||
# 572: Technique(
|
||||
# name="Tool to System Messsages",
|
||||
# for_supports_tools=False,
|
||||
# for_not_supports_tools=True,
|
||||
# ),
|
||||
}
|
||||
|
||||
@@ -1,160 +1,150 @@
|
||||
from libs.classes import Test
|
||||
from libs.runnables import basic_prompt, one_tool_call_answer, agent_with_tools
|
||||
from libs.validators import regex_match_any, system_human_answer_match
|
||||
from libs.tools import add, multiply, get_current_date_and_time, get_notes_in_timespan, get_notes_containing, write_note
|
||||
from textwrap import dedent
|
||||
from libs.classes import Test
|
||||
from libs.runnables import basic_prompt, one_tool_call_answer, agent_with_tools
|
||||
from libs.validators import regex_match_any, system_human_answer_match
|
||||
from libs.tools import add, multiply, get_current_date_and_time, get_notes_in_timespan, get_notes_containing, write_note
|
||||
from textwrap import dedent
|
||||
from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage, AIMessage
|
||||
|
||||
tests = {
|
||||
607: Test(
|
||||
name="Healthy Vegetables in Chinese",
|
||||
runnable=basic_prompt,
|
||||
runnable_input={
|
||||
"system_msg": "You are a helpful assistant. You serve people across the globe.",
|
||||
"human_msg": "什么蔬菜最健康?",
|
||||
},
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": dedent("""- in Mandarin Chinese from front to finnish
|
||||
- factually correct
|
||||
- about healthy vegetables
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)
|
||||
|
||||
Again, the message has to be entirely in Manadarin Chineese.
|
||||
That means If the answer is not in Chinese the answer is NOT correct! Only if the message in in Chinese rate as correct"""),
|
||||
}
|
||||
),
|
||||
693: Test(
|
||||
name="Simple Multiplication",
|
||||
runnable=one_tool_call_answer,
|
||||
runnable_input={
|
||||
"system_msg": "You are a helpful assistant.",
|
||||
"human_msg": "What is 234215 times 143243?",
|
||||
"tools": {
|
||||
"add": add,
|
||||
"multiply": multiply
|
||||
}
|
||||
},
|
||||
validator=regex_match_any,
|
||||
validation_input={
|
||||
"patterns": ["33549659245", "33,549,659,245", "33.549.659.245"]
|
||||
# "patterns": ["3[,\. ]?3[,\. ]?5[,\. ]?4[,\. ]?9[,\. ]?6[,\. ]?5[,\. ]?9[,\. ]?2[,\. ]?4[,\. ]?5"] # Would accept 3.354.965.9245
|
||||
}
|
||||
),
|
||||
120: Test(
|
||||
name="Complex Multiplication",
|
||||
runnable=agent_with_tools,
|
||||
runnable_input={
|
||||
"system_msg": 'You are a helpful assistant. You can use tools to accomplish the task. Once you\'ve called a tool. the resulting tool_message content can be taken into consideration again. With that you can do "multiple rounds" of tool calling.',
|
||||
"human_msg": "What is 235 times 1243 times 21?",
|
||||
"tools": {
|
||||
"add": add,
|
||||
"multiply": multiply
|
||||
}
|
||||
},
|
||||
validator=regex_match_any,
|
||||
validation_input={
|
||||
"patterns": [ "6134205", "6.134.205", "6,134,205" ]
|
||||
}
|
||||
),
|
||||
283: Test(
|
||||
name="Notes from last Saturday",
|
||||
runnable=agent_with_tools,
|
||||
runnable_input={
|
||||
"system_msg": "You are a helpful assistant. You can use tools to accomplish tasks. Once you've called a tool, the resulting tool_message content can be taken into consideration again. With that you can do \"multiple rounds\" of tool calling. To know the date, use the tool get_current_date_and_time.",
|
||||
"human_msg": "Last Saturday, who did grandma want me to call?",
|
||||
"tools": {
|
||||
"get_current_date_and_time": get_current_date_and_time,
|
||||
"get_notes_in_timespan": get_notes_in_timespan,
|
||||
"get_notes_containing": get_notes_containing,
|
||||
"Write note": write_note,
|
||||
}
|
||||
},
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
|
||||
}
|
||||
),
|
||||
260: Test(
|
||||
name="Notes from last Saturday TSO", # time span only
|
||||
runnable=agent_with_tools,
|
||||
runnable_input={
|
||||
"system_msg": "You are a helpful assistant. You can use tools to accomplish tasks. Once you've called a tool, the resulting tool_message content can be taken into consideration again. With that you can do \"multiple rounds\" of tool calling. To know the date, use the tool get_current_date_and_time.",
|
||||
"human_msg": "Last Saturday, who did grandma want me to call?",
|
||||
"tools": {
|
||||
"get_current_date_and_time": get_current_date_and_time,
|
||||
"get_notes_in_timespan": get_notes_in_timespan,
|
||||
"Write note": write_note
|
||||
}
|
||||
},
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
|
||||
}
|
||||
),
|
||||
856: Test(
|
||||
name="Notes from last Saturday TSO FSP",
|
||||
runnable=agent_with_tools,
|
||||
runnable_input={
|
||||
"system_msg": "You are a helpful assistant. You can use tools to accomplish tasks. Once you've called a tool, the resulting tool_message content can be taken into consideration again. With that you can do \"multiple rounds\" of tool calling. To know the date, use the tool get_current_date_and_time.",
|
||||
"fsp_messages": [
|
||||
HumanMessage("Tomorrow is the anniversary! Any tips what I should by her?"), # One year ago
|
||||
AIMessage("", tool_calls=[{"name": "get_current_date_and_time", "args": {}, "id": "11" }]),
|
||||
ToolMessage("Wednesday the 31st of Juli 2024 09:31", tool_call_id="11" ),
|
||||
AIMessage("", tool_calls=[{"name": "get_notes_in_timespan", "args": {"begin": "2023/08/01", "to": "2023/08/01"}, "id": "12"}], ),
|
||||
ToolMessage("2023/08/01 23:10 Went out with Charlotte for our anniversary. Pizza at Cavalinos. She loved the rose necklace!", tool_call_id="12"),
|
||||
AIMessage("I'm afraid I cannot be of great help, since I obviously know charlotte way less than you, but last year you two went out to Cavalinons and you got her a rose necklace as a present. And she liked it. So maybe a pair of earrings would be something she'd like?", name="example_assistant", ),
|
||||
|
||||
HumanMessage("Did I write down anything yesterday or the day before that?"),
|
||||
AIMessage("", tool_calls=[{"name": "get_current_date_and_time", "args": {}, "id": "21" }], ),
|
||||
ToolMessage("Wednesday the 7th of August 2024 16:23", tool_call_id="21" ),
|
||||
AIMessage("", tool_calls=[{"name": "get_notes_in_timespan", "args": {"begin": "2024/08/05", "to": "2024/08/06"}, "id": "22"}], ),
|
||||
ToolMessage("2024/08/05 11:45 Ask Dr. Mills about the side effects of the new medication he got me.\n\n2024/08/06 18:30 Pick up the dry cleaning on Thursday; they close early on Fridays.", tool_call_id="22"),
|
||||
AIMessage("Yes. I found two entries.\n- From yesterday stating that you wanted to pickup the dry cleaning on Thursday, because they close early on Fridays\n- From Monday a note saying that you want to ask Dr. Mills about the side effects of the new medication he got you.", name="example_assistant", ),
|
||||
],
|
||||
"human_msg": "Last Saturday, who did grandma want me to call?",
|
||||
"tools": {
|
||||
"get_current_date_and_time": get_current_date_and_time,
|
||||
"get_notes_in_timespan": get_notes_in_timespan,
|
||||
"Write note": write_note
|
||||
}
|
||||
},
|
||||
607: Test(
|
||||
name="Healthy Vegetables in Chinese",
|
||||
runnable=basic_prompt,
|
||||
runnable_input={
|
||||
"system_msg": "You are a helpful assistant. You serve people across the globe.",
|
||||
"human_msg": "什么蔬菜最健康?",
|
||||
},
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
|
||||
}
|
||||
),
|
||||
|
||||
# 363: Test(),
|
||||
# 600: Test(),
|
||||
# 221: Test(),
|
||||
# 985: Test(),
|
||||
# 634: Test(),
|
||||
# 927: Test(),
|
||||
# 346: Test(),
|
||||
# 995: Test(),
|
||||
# 404: Test(),
|
||||
# 299: Test(),
|
||||
# 275: Test(),
|
||||
# 852: Test(),
|
||||
# 376: Test(),
|
||||
# 263: Test(),
|
||||
# 432: Test(),
|
||||
# 270: Test(),
|
||||
# 797: Test(),
|
||||
# 340: Test(),
|
||||
# 489: Test(),
|
||||
# 786: Test(),
|
||||
# 121: Test(),
|
||||
# 971: Test(),
|
||||
# 436: Test(),
|
||||
# 147: Test(),
|
||||
# 534: Test(),
|
||||
# 190: Test(),
|
||||
# 158: Test(),
|
||||
# 191: Test(),
|
||||
}
|
||||
validation_input={
|
||||
"criteria": dedent("""- in Mandarin Chinese from front to finnish
|
||||
- factually correct
|
||||
- about healthy vegetables
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)
|
||||
|
||||
Again, the message has to be entirely in Manadarin Chineese.
|
||||
That means If the answer is not in Chinese the answer is NOT correct! Only if the message in in Chinese rate as correct"""),
|
||||
},
|
||||
),
|
||||
693: Test(
|
||||
name="Simple Multiplication",
|
||||
runnable=one_tool_call_answer,
|
||||
runnable_input={
|
||||
"system_msg": "You are a helpful assistant.",
|
||||
"human_msg": "What is 234215 times 143243?",
|
||||
"tools": {"add": add, "multiply": multiply},
|
||||
},
|
||||
validator=regex_match_any,
|
||||
validation_input={
|
||||
"patterns": ["33549659245", "33,549,659,245", "33.549.659.245"]
|
||||
# "patterns": ["3[,\. ]?3[,\. ]?5[,\. ]?4[,\. ]?9[,\. ]?6[,\. ]?5[,\. ]?9[,\. ]?2[,\. ]?4[,\. ]?5"] # Would accept 3.354.965.9245
|
||||
},
|
||||
),
|
||||
120: Test(
|
||||
name="Complex Multiplication",
|
||||
runnable=agent_with_tools,
|
||||
runnable_input={
|
||||
"system_msg": 'You are a helpful assistant. You can use tools to accomplish the task. Once you\'ve called a tool. the resulting tool_message content can be taken into consideration again. With that you can do "multiple rounds" of tool calling.',
|
||||
"human_msg": "What is 235 times 1243 times 21?",
|
||||
"tools": {"add": add, "multiply": multiply},
|
||||
},
|
||||
validator=regex_match_any,
|
||||
validation_input={"patterns": ["6134205", "6.134.205", "6,134,205"]},
|
||||
),
|
||||
283: Test(
|
||||
name="Notes from last Saturday",
|
||||
runnable=agent_with_tools,
|
||||
runnable_input={
|
||||
"system_msg": 'You are a helpful assistant. You can use tools to accomplish tasks. Once you\'ve called a tool, the resulting tool_message content can be taken into consideration again. With that you can do "multiple rounds" of tool calling. To know the date, use the tool get_current_date_and_time.',
|
||||
"human_msg": "Last Saturday, who did grandma want me to call?",
|
||||
"tools": {
|
||||
"get_current_date_and_time": get_current_date_and_time,
|
||||
"get_notes_in_timespan": get_notes_in_timespan,
|
||||
"get_notes_containing": get_notes_containing,
|
||||
"Write note": write_note,
|
||||
},
|
||||
},
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
|
||||
},
|
||||
),
|
||||
260: Test(
|
||||
name="Notes from last Saturday TSO", # time span only
|
||||
runnable=agent_with_tools,
|
||||
runnable_input={
|
||||
"system_msg": 'You are a helpful assistant. You can use tools to accomplish tasks. Once you\'ve called a tool, the resulting tool_message content can be taken into consideration again. With that you can do "multiple rounds" of tool calling. To know the date, use the tool get_current_date_and_time.',
|
||||
"human_msg": "Last Saturday, who did grandma want me to call?",
|
||||
"tools": {
|
||||
"get_current_date_and_time": get_current_date_and_time,
|
||||
"get_notes_in_timespan": get_notes_in_timespan,
|
||||
"Write note": write_note,
|
||||
},
|
||||
},
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
|
||||
},
|
||||
),
|
||||
856: Test(
|
||||
name="Notes from last Saturday TSO FSP",
|
||||
runnable=agent_with_tools,
|
||||
runnable_input={
|
||||
"system_msg": 'You are a helpful assistant. You can use tools to accomplish tasks. Once you\'ve called a tool, the resulting tool_message content can be taken into consideration again. With that you can do "multiple rounds" of tool calling. To know the date, use the tool get_current_date_and_time.',
|
||||
"fsp_messages": [
|
||||
HumanMessage("Tomorrow is the anniversary! Any tips what I should by her?"), # One year ago
|
||||
AIMessage("", tool_calls=[{"name": "get_current_date_and_time", "args": {}, "id": "11"}]),
|
||||
ToolMessage("Wednesday the 31st of Juli 2024 09:31", tool_call_id="11"),
|
||||
AIMessage("", tool_calls=[ { "name": "get_notes_in_timespan", "args": {"begin": "2023/08/01", "to": "2023/08/01"}, "id": "12", } ]),
|
||||
ToolMessage("2023/08/01 23:10 Went out with Charlotte for our anniversary. Pizza at Cavalinos. She loved the rose necklace!", tool_call_id="12"),
|
||||
AIMessage("I'm afraid I cannot be of great help, since I obviously know charlotte way less than you, but last year you two went out to Cavalinons and you got her a rose necklace as a present. And she liked it. So maybe a pair of earrings would be something she'd like?", name="example_assistant"),
|
||||
|
||||
HumanMessage("Did I write down anything yesterday or the day before that?"),
|
||||
AIMessage( "", tool_calls=[{"name": "get_current_date_and_time", "args": {}, "id": "21"}]),
|
||||
ToolMessage("Wednesday the 7th of August 2024 16:23", tool_call_id="21"),
|
||||
AIMessage( "", tool_calls=[ { "name": "get_notes_in_timespan", "args": {"begin": "2024/08/05", "to": "2024/08/06"}, "id": "22"}]),
|
||||
ToolMessage( "2024/08/05 11:45 Ask Dr. Mills about the side effects of the new medication he got me.\n\n2024/08/06 18:30 Pick up the dry cleaning on Thursday; they close early on Fridays.", tool_call_id="22"),
|
||||
AIMessage( "Yes. I found two entries.\n- From yesterday stating that you wanted to pickup the dry cleaning on Thursday, because they close early on Fridays\n- From Monday a note saying that you want to ask Dr. Mills about the side effects of the new medication he got you.", name="example_assistant"),
|
||||
],
|
||||
"human_msg": "Last Saturday, who did grandma want me to call?",
|
||||
"tools": {
|
||||
"get_current_date_and_time": get_current_date_and_time,
|
||||
"get_notes_in_timespan": get_notes_in_timespan,
|
||||
"Write note": write_note,
|
||||
},
|
||||
},
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
|
||||
},
|
||||
),
|
||||
# 363: Test(),
|
||||
# 600: Test(),
|
||||
# 221: Test(),
|
||||
# 985: Test(),
|
||||
# 634: Test(),
|
||||
# 927: Test(),
|
||||
# 346: Test(),
|
||||
# 995: Test(),
|
||||
# 404: Test(),
|
||||
# 299: Test(),
|
||||
# 275: Test(),
|
||||
# 852: Test(),
|
||||
# 376: Test(),
|
||||
# 263: Test(),
|
||||
# 432: Test(),
|
||||
# 270: Test(),
|
||||
# 797: Test(),
|
||||
# 340: Test(),
|
||||
# 489: Test(),
|
||||
# 786: Test(),
|
||||
# 121: Test(),
|
||||
# 971: Test(),
|
||||
# 436: Test(),
|
||||
# 147: Test(),
|
||||
# 534: Test(),
|
||||
# 190: Test(),
|
||||
# 158: Test(),
|
||||
# 191: Test(),
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user