This commit is contained in:
Lennart J. Kurzweg (Nx2)
2024-09-27 21:34:21 +02:00
parent 3c2429ce78
commit 76b2b436be
8 changed files with 297 additions and 133 deletions

View File

@@ -33,10 +33,7 @@ tests = {
"tools": {"add": add, "multiply": multiply},
},
validator=regex_match_any,
validation_input={
"patterns": ["33549659245", "33,549,659,245", "33.549.659.245"]
# "patterns": ["3[,\. ]?3[,\. ]?5[,\. ]?4[,\. ]?9[,\. ]?6[,\. ]?5[,\. ]?9[,\. ]?2[,\. ]?4[,\. ]?5"] # Would accept 3.354.965.9245
},
validation_input={"patterns": ["33549659245", "33,549,659,245", "33.549.659.245"]},
),
120: Test(
name="Complex Multiplication",
@@ -47,10 +44,10 @@ tests = {
"tools": {"add": add, "multiply": multiply},
},
validator=regex_match_any,
validation_input={"patterns": ["6134205", "6.134.205", "6,134,205"]},
validation_input={"patterns": ["6134205", "6,134,205"]},
),
363: Test(
name="Complex Multiplication Python",
name="Python Remainder",
runnable=one_tool_call_answer,
runnable_input={
"system_msg": 'You are a helpful assistant.',
@@ -58,9 +55,7 @@ tests = {
"tools": { "save_python_repl": save_python_repl },
},
validator=regex_match_any,
validation_input={
"patterns": [ "236", "two ?hundred and thirty ?six", "two ?hundred thirty ?six" ]
}
validation_input={"patterns": [ "236", "two ?hundred and thirty ?six", "two ?hundred thirty ?six" ]}
),
283: Test(
name="Notes from last Saturday",
@@ -96,7 +91,7 @@ tests = {
validator=system_human_answer_match,
validation_input={
"criteria": dedent("""- containing the information that the Human should call Wolfgang
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
},
),
856: Test(
@@ -113,11 +108,11 @@ tests = {
AIMessage("I'm afraid I cannot be of great help, since I obviously know charlotte way less than you, but last year you two went out to Cavalinons and you got her a rose necklace as a present. And she liked it. So maybe a pair of earrings would be something she'd like?", name="example_assistant"),
HumanMessage("Did I write down anything yesterday or the day before that?"),
AIMessage( "", tool_calls=[{"name": "get_current_date_and_time", "args": {}, "id": "21"}]),
AIMessage("", tool_calls=[{"name": "get_current_date_and_time", "args": {}, "id": "21"}]),
ToolMessage("Wednesday the 7th of August 2024 16:23", tool_call_id="21"),
AIMessage( "", tool_calls=[ { "name": "get_notes_in_timespan", "args": {"begin": "2024/08/05", "to": "2024/08/06"}, "id": "22"}]),
AIMessage("", tool_calls=[ { "name": "get_notes_in_timespan", "args": {"begin": "2024/08/05", "to": "2024/08/06"}, "id": "22"}]),
ToolMessage( "2024/08/05 11:45 Ask Dr. Mills about the side effects of the new medication he got me.\n\n2024/08/06 18:30 Pick up the dry cleaning on Thursday; they close early on Fridays.", tool_call_id="22"),
AIMessage( "Yes. I found two entries.\n- From yesterday stating that you wanted to pickup the dry cleaning on Thursday, because they close early on Fridays\n- From Monday a note saying that you want to ask Dr. Mills about the side effects of the new medication he got you.", name="example_assistant"),
AIMessage("Yes. I found two entries.\n- From yesterday stating that you wanted to pickup the dry cleaning on Thursday, because they close early on Fridays\n- From Monday a note saying that you want to ask Dr. Mills about the side effects of the new medication he got you.", name="example_assistant"),
],
"human_msg": "Last Saturday, who did grandma want me to call?",
"tools": {