restructuring
This commit is contained in:
159
suite_settings/tests.py
Normal file
159
suite_settings/tests.py
Normal file
@@ -0,0 +1,159 @@
|
||||
from libs.test_class import Test
|
||||
from libs.runnables import *
|
||||
from libs.validators import *
|
||||
from libs.tools import *
|
||||
from textwrap import dedent
|
||||
from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage, AIMessage
|
||||
|
||||
tests = {
|
||||
607: Test(
|
||||
name="Healthy Vegetables in Chinese",
|
||||
runnable=basic,
|
||||
runnable_input={
|
||||
"system_msg": "You are a helpful assistant. You serve people across the globe.",
|
||||
"human_msg": "什么蔬菜最健康?",
|
||||
},
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": dedent("""- in Mandarin Chinese from front to finnish
|
||||
- factually correct
|
||||
- about healthy vegetables
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)
|
||||
|
||||
Again, the message has to be entirely in Manadarin Chineese.
|
||||
That means If the answer is not in Chinese the answer is NOT correct! Only if the message in in Chinese rate as correct"""),
|
||||
}
|
||||
),
|
||||
693: Test(
|
||||
name="Simple Multiplication",
|
||||
runnable=one_tool_call_answer,
|
||||
runnable_input={
|
||||
"system_msg": "You are a helpful assistant.",
|
||||
"human_msg": "What is 234215 times 143243?",
|
||||
"tools": {
|
||||
"add": add,
|
||||
"multiply": multiply
|
||||
}
|
||||
},
|
||||
validator=regex_match_any,
|
||||
validation_input={
|
||||
"patterns": ["33549659245", "33,549,659,245", "33.549.659.245"]
|
||||
# "patterns": ["3[,\. ]?3[,\. ]?5[,\. ]?4[,\. ]?9[,\. ]?6[,\. ]?5[,\. ]?9[,\. ]?2[,\. ]?4[,\. ]?5"] # Would accept 3.354.965.9245
|
||||
}
|
||||
),
|
||||
120: Test(
|
||||
name="Complex Multiplication",
|
||||
runnable=agent_with_tools,
|
||||
runnable_input={
|
||||
"system_msg": 'You are a helpful assistant. You can use tools to accomplish the task. Once you\'ve called a tool. the resulting tool_message content can be taken into consideration again. With that you can do "multiple rounds" of tool calling.',
|
||||
"human_msg": "What is 235 times 1243 times 21?",
|
||||
"tools": {
|
||||
"add": add,
|
||||
"multiply": multiply
|
||||
}
|
||||
},
|
||||
validator=regex_match_any,
|
||||
validation_input={
|
||||
"patterns": [ "6134205", "6.134.205", "6,134,205" ]
|
||||
}
|
||||
),
|
||||
283: Test(
|
||||
name="Notes from last Saturday",
|
||||
runnable=agent_with_tools,
|
||||
runnable_input={
|
||||
"system_msg": "You are a helpful assistant. You can use tools to accomplish tasks. Once you've called a tool, the resulting tool_message content can be taken into consideration again. With that you can do \"multiple rounds\" of tool calling. To know the date, use the tool get_current_date_and_time.",
|
||||
"human_msg": "Last Saturday, who did grandma want me to call?",
|
||||
"tools": {
|
||||
"get_current_date_and_time": get_current_date_and_time,
|
||||
"get_notes_in_timespan": get_notes_in_timespan,
|
||||
"get_notes_containing": get_notes_containing,
|
||||
"Write note": write_note
|
||||
}
|
||||
},
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
|
||||
}
|
||||
),
|
||||
260: Test(
|
||||
name="Notes from last Saturday TSO", # time span only
|
||||
runnable=agent_with_tools,
|
||||
runnable_input={
|
||||
"system_msg": "You are a helpful assistant. You can use tools to accomplish tasks. Once you've called a tool, the resulting tool_message content can be taken into consideration again. With that you can do \"multiple rounds\" of tool calling. To know the date, use the tool get_current_date_and_time.",
|
||||
"human_msg": "Last Saturday, who did grandma want me to call?",
|
||||
"tools": {
|
||||
"get_current_date_and_time": get_current_date_and_time,
|
||||
"get_notes_in_timespan": get_notes_in_timespan,
|
||||
"Write note": write_note
|
||||
}
|
||||
},
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
|
||||
}
|
||||
),
|
||||
856: Test(
|
||||
name="Notes from last Saturday TSO FSP",
|
||||
runnable=agent_with_tools_fsp,
|
||||
runnable_input={
|
||||
"system_msg": "You are a helpful assistant. You can use tools to accomplish tasks. Once you've called a tool, the resulting tool_message content can be taken into consideration again. With that you can do \"multiple rounds\" of tool calling. To know the date, use the tool get_current_date_and_time.",
|
||||
"fsp_messages": [
|
||||
HumanMessage("Tomorrow is the anniversary! Any tips what I should by her?"), # One year ago
|
||||
AIMessage("", tool_calls=[{"name": "get_current_date_and_time", "args": {}, "id": "11" }]),
|
||||
ToolMessage("Wednesday the 31st of Juli 2024 09:31", tool_call_id="11" ),
|
||||
AIMessage("", tool_calls=[{"name": "get_notes_in_timespan", "args": {"begin": "2023/08/01", "to": "2023/08/01"}, "id": "12"}], ),
|
||||
ToolMessage("2023/08/01 23:10 Went out with Charlotte for our anniversary. Pizza at Cavalinos. She loved the rose necklace!", tool_call_id="12"),
|
||||
AIMessage("I'm afraid I cannot be of great help, since I obviously know charlotte way less than you, but last year you two went out to Cavalinons and you got her a rose necklace as a present. And she liked it. So maybe a pair of earrings would be something she'd like?", name="example_assistant", ),
|
||||
|
||||
HumanMessage("Did I write down anything yesterday or the day before that?"),
|
||||
AIMessage("", tool_calls=[{"name": "get_current_date_and_time", "args": {}, "id": "21" }], ),
|
||||
ToolMessage("Wednesday the 7th of August 2024 16:23", tool_call_id="21" ),
|
||||
AIMessage("", tool_calls=[{"name": "get_notes_in_timespan", "args": {"begin": "2024/08/05", "to": "2024/08/06"}, "id": "22"}], ),
|
||||
ToolMessage("2024/08/05 11:45 Ask Dr. Mills about the side effects of the new medication he got me.\n\n2024/08/06 18:30 Pick up the dry cleaning on Thursday; they close early on Fridays.", tool_call_id="22"),
|
||||
AIMessage("Yes. I found two entries.\n- From yesterday stating that you wanted to pickup the dry cleaning on Thursday, because they close early on Fridays\n- From Monday a note saying that you want to ask Dr. Mills about the side effects of the new medication he got you.", name="example_assistant", ),
|
||||
],
|
||||
"human_msg": "Last Saturday, who did grandma want me to call?",
|
||||
"tools": {
|
||||
"get_current_date_and_time": get_current_date_and_time,
|
||||
"get_notes_in_timespan": get_notes_in_timespan,
|
||||
"Write note": write_note
|
||||
}
|
||||
},
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
|
||||
}
|
||||
),
|
||||
# 363: Test(),
|
||||
# 600: Test(),
|
||||
# 221: Test(),
|
||||
# 985: Test(),
|
||||
# 634: Test(),
|
||||
# 927: Test(),
|
||||
# 346: Test(),
|
||||
# 995: Test(),
|
||||
# 404: Test(),
|
||||
# 299: Test(),
|
||||
# 275: Test(),
|
||||
# 852: Test(),
|
||||
# 376: Test(),
|
||||
# 263: Test(),
|
||||
# 432: Test(),
|
||||
# 270: Test(),
|
||||
# 797: Test(),
|
||||
# 340: Test(),
|
||||
# 489: Test(),
|
||||
# 786: Test(),
|
||||
# 121: Test(),
|
||||
# 971: Test(),
|
||||
# 436: Test(),
|
||||
# 147: Test(),
|
||||
# 534: Test(),
|
||||
# 190: Test(),
|
||||
# 158: Test(),
|
||||
# 191: Test(),
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user