from libs.classes import Test from libs.runnables import basic_prompt, one_tool_call_answer, agent_with_tools from libs.validators import regex_match_any, system_human_answer_match from libs.tools import add, multiply, get_current_date_and_time, get_notes_in_timespan, get_notes_containing, write_note, save_python_repl from textwrap import dedent from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage, AIMessage tests = { # 607: Test( # name="Healthy Vegetables in Chinese", # runnable=basic_prompt, # runnable_input={ # "system_msg": "You are a helpful assistant. You serve people across the globe.", # "human_msg": "什么蔬菜最健康?", # }, # validator=system_human_answer_match, # validation_input={ # "criteria": dedent("""- in Mandarin Chinese from front to finnish # - factually correct # - about healthy vegetables # - just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes) # Again, the message has to be entirely in Manadarin Chineese. # That means If the answer is not in Chinese the answer is NOT correct! Only if the message in in Chinese rate as correct"""), # }, # ), 693: Test( name="Simple Multiplication", runnable=one_tool_call_answer, runnable_input={ "system_msg": "You are a helpful assistant.", "human_msg": "What is 234215 times 143243?", "tools": {"add": add, "multiply": multiply}, }, validator=regex_match_any, validation_input={"patterns": ["33[,_]?549[,_]?659[,_]?245",]}, ), 120: Test( name="Complex Multiplication", runnable=agent_with_tools, runnable_input={ "system_msg": 'You are a helpful assistant. You can use tools to accomplish the task. Once you\'ve called a tool. the resulting tool_message content can be taken into consideration again. With that you can do "multiple rounds" of tool calling.', "human_msg": "What is 235 times 1243 times 21?", "tools": {"add": add, "multiply": multiply}, }, validator=regex_match_any, validation_input={"patterns": ["6134205", "6,134,205"]}, ), 363: Test( name="Python Remainder", runnable=one_tool_call_answer, runnable_input={ "system_msg": 'You are a helpful assistant.', "human_msg": 'Is 31515261 divisible by 425? If not, whats the remainder?', "tools": { "save_python_repl": save_python_repl }, }, validator=regex_match_any, validation_input={"patterns": [ "236", "two ?hundred and thirty ?six", "two ?hundred thirty ?six" ]} ), 283: Test( name="Notes from last Saturday", runnable=agent_with_tools, runnable_input={ "system_msg": 'You are a helpful assistant. You can use tools to accomplish tasks. Once you\'ve called a tool, the resulting tool_message content can be taken into consideration again. With that you can do "multiple rounds" of tool calling. To know the date, use the tool get_current_date_and_time.', "human_msg": "Last Saturday, who did grandma want me to call?", "tools": { "get_current_date_and_time": get_current_date_and_time, "get_notes_in_timespan": get_notes_in_timespan, "get_notes_containing": get_notes_containing, "Write note": write_note, }, }, validator=system_human_answer_match, validation_input={ "criteria": dedent("""- containing the information that the Human should call Wolfgang - just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting disclaimers or notes, what specific tool was used to get the answer, etc.)""") }, ), 260: Test( name="Notes from last Saturday TSO", # time span only runnable=agent_with_tools, runnable_input={ "system_msg": 'You are a helpful assistant. You can use tools to accomplish tasks. Once you\'ve called a tool, the resulting tool_message content can be taken into consideration again. With that you can do "multiple rounds" of tool calling. To know the date, use the tool get_current_date_and_time.', "human_msg": "Last Saturday, who did grandma want me to call?", "tools": { "get_current_date_and_time": get_current_date_and_time, "get_notes_in_timespan": get_notes_in_timespan, "Write note": write_note, }, }, validator=system_human_answer_match, validation_input={ "criteria": dedent("""- containing the information that the Human should call Wolfgang - just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting disclaimers or notes, what specific tool was used to get the answer, etc.)""") }, ), 856: Test( name="Notes from last Saturday TSO FSP", runnable=agent_with_tools, runnable_input={ "system_msg": 'You are a helpful assistant. You can use tools to accomplish tasks. Once you\'ve called a tool, the resulting tool_message content can be taken into consideration again. With that you can do "multiple rounds" of tool calling. To know the date, use the tool get_current_date_and_time.', "fsp_messages": [ HumanMessage("Tomorrow is the anniversary! Any tips what I should by her?"), # One year ago AIMessage("", tool_calls=[{"name": "get_current_date_and_time", "args": {}, "id": "11"}]), ToolMessage("Wednesday the 31st of Juli 2024 09:31", tool_call_id="11"), AIMessage("", tool_calls=[ { "name": "get_notes_in_timespan", "args": {"begin": "2023/08/01", "to": "2023/08/01"}, "id": "12", } ]), ToolMessage("2023/08/01 23:10 Went out with Charlotte for our anniversary. Pizza at Cavalinos. She loved the rose necklace!", tool_call_id="12"), AIMessage("I'm afraid I cannot be of great help, since I obviously know charlotte way less than you, but last year you two went out to Cavalinons and you got her a rose necklace as a present. And she liked it. So maybe a pair of earrings would be something she'd like?", name="example_assistant"), HumanMessage("Did I write down anything yesterday or the day before that?"), AIMessage("", tool_calls=[{"name": "get_current_date_and_time", "args": {}, "id": "21"}]), ToolMessage("Wednesday the 7th of August 2024 16:23", tool_call_id="21"), AIMessage("", tool_calls=[ { "name": "get_notes_in_timespan", "args": {"begin": "2024/08/05", "to": "2024/08/06"}, "id": "22"}]), ToolMessage( "2024/08/05 11:45 Ask Dr. Mills about the side effects of the new medication he got me.\n\n2024/08/06 18:30 Pick up the dry cleaning on Thursday; they close early on Fridays.", tool_call_id="22"), AIMessage("Yes. I found two entries.\n- From yesterday stating that you wanted to pickup the dry cleaning on Thursday, because they close early on Fridays\n- From Monday a note saying that you want to ask Dr. Mills about the side effects of the new medication he got you.", name="example_assistant"), ], "human_msg": "Last Saturday, who did grandma want me to call?", "tools": { "get_current_date_and_time": get_current_date_and_time, "get_notes_in_timespan": get_notes_in_timespan, "Write note": write_note, }, }, validator=system_human_answer_match, validation_input={ "criteria": dedent("""- containing the information that the Human should call Wolfgang - just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting disclaimers or notes, what specific tool was used to get the answer, etc.)""") }, ), # 600: Test(), # 221: Test(), # 985: Test(), # 634: Test(), # 927: Test(), # 346: Test(), # 995: Test(), # 404: Test(), # 299: Test(), # 275: Test(), # 852: Test(), # 376: Test(), # 263: Test(), # 432: Test(), # 270: Test(), # 797: Test(), # 340: Test(), # 489: Test(), # 786: Test(), # 121: Test(), # 971: Test(), # 436: Test(), # 147: Test(), # 534: Test(), # 190: Test(), # 158: Test(), # 191: Test(), }