restructuring

2024-08-14 21:02:12 +02:00
parent 15973d723f
commit 298d8c83ef
5 changed files with 205 additions and 75 deletions
--- a/suite_settings/tests.py
+++ b/suite_settings/tests.py
@@ -0,0 +1,159 @@
+from libs.test_class import Test
+from libs.runnables  import * 
+from libs.validators import * 
+from libs.tools      import *
+from textwrap        import dedent
+from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage, AIMessage
+
+tests = {
+	607: Test(
+	    name="Healthy Vegetables in Chinese",
+	    runnable=basic,
+	    runnable_input={
+	        "system_msg": "You are a helpful assistant. You serve people across the globe.",
+	        "human_msg": "什么蔬菜最健康？",
+	    },
+        validator=system_human_answer_match,
+	    validation_input={
+    	    "criteria": dedent("""- in Mandarin Chinese from front to finnish
+			- factually correct
+			- about healthy vegetables
+			- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)
+
+			Again, the message has to be entirely in Manadarin Chineese.
+			That means If the answer is not in Chinese the answer is NOT correct! Only if the message in in Chinese rate as correct"""),
+	    }
+    ),
+    693: Test(
+	    name="Simple Multiplication",
+	    runnable=one_tool_call_answer,
+	    runnable_input={
+	        "system_msg": "You are a helpful assistant.",
+	        "human_msg": "What is 234215 times 143243?",
+	        "tools": {
+	            "add": add,
+	            "multiply": multiply
+	        }
+	    },
+        validator=regex_match_any,
+	    validation_input={
+	        "patterns": ["33549659245", "33,549,659,245", "33.549.659.245"]
+	        # "patterns": ["3[,\. ]?3[,\. ]?5[,\. ]?4[,\. ]?9[,\. ]?6[,\. ]?5[,\. ]?9[,\. ]?2[,\. ]?4[,\. ]?5"] # Would accept 3.354.965.9245
+	    }
+    ),
+	120: Test(
+	    name="Complex Multiplication",
+	    runnable=agent_with_tools,
+	    runnable_input={
+	        "system_msg": 'You are a helpful assistant. You can use tools to accomplish the task. Once you\'ve called a tool. the resulting tool_message content can be taken into consideration again. With that you can do "multiple rounds" of tool calling.',
+	        "human_msg": "What is 235 times 1243 times 21?",
+	        "tools": {
+	            "add": add,
+	            "multiply": multiply
+	        }
+	    },
+        validator=regex_match_any,
+	    validation_input={
+	        "patterns": [ "6134205", "6.134.205", "6,134,205" ]
+	    }
+    ),
+    283: Test(
+	    name="Notes from last Saturday",
+	    runnable=agent_with_tools,
+	    runnable_input={
+	        "system_msg": "You are a helpful assistant. You can use tools to accomplish tasks. Once you've called a tool, the resulting tool_message content can be taken into consideration again. With that you can do \"multiple rounds\" of tool calling. To know the date, use the tool get_current_date_and_time.",
+	        "human_msg": "Last Saturday, who did grandma want me to call?",
+	        "tools": {
+	            "get_current_date_and_time": get_current_date_and_time,
+	            "get_notes_in_timespan": get_notes_in_timespan,
+	            "get_notes_containing": get_notes_containing,
+	            "Write note": write_note
+	        }
+	    },
+        validator=system_human_answer_match,
+	    validation_input={
+    	    "criteria": dedent("""- containing the information that the Human should call Wolfgang 
+			- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
+	    }
+    ),
+    260: Test(
+	    name="Notes from last Saturday TSO", # time span only
+	    runnable=agent_with_tools,
+	    runnable_input={
+	        "system_msg": "You are a helpful assistant. You can use tools to accomplish tasks. Once you've called a tool, the resulting tool_message content can be taken into consideration again. With that you can do \"multiple rounds\" of tool calling. To know the date, use the tool get_current_date_and_time.",
+	        "human_msg": "Last Saturday, who did grandma want me to call?",
+	        "tools": {
+	            "get_current_date_and_time": get_current_date_and_time,
+	            "get_notes_in_timespan": get_notes_in_timespan,
+	            "Write note": write_note
+	        }
+	    },
+        validator=system_human_answer_match,
+	    validation_input={
+    	    "criteria": dedent("""- containing the information that the Human should call Wolfgang 
+			- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
+	    }
+    ),
+	856: Test(
+	    name="Notes from last Saturday TSO FSP", 
+	    runnable=agent_with_tools_fsp,
+	    runnable_input={
+		    "system_msg": "You are a helpful assistant. You can use tools to accomplish tasks. Once you've called a tool, the resulting tool_message content can be taken into consideration again. With that you can do \"multiple rounds\" of tool calling. To know the date, use the tool get_current_date_and_time.",
+	        "fsp_messages": [
+				HumanMessage("Tomorrow is the anniversary! Any tips what I should by her?"), # One year ago
+			    AIMessage("", tool_calls=[{"name": "get_current_date_and_time", "args": {}, "id": "11" }]),
+			    ToolMessage("Wednesday the 31st of Juli 2024 09:31", tool_call_id="11" ),
+			    AIMessage("", tool_calls=[{"name": "get_notes_in_timespan", "args": {"begin": "2023/08/01", "to": "2023/08/01"}, "id": "12"}], ),
+			    ToolMessage("2023/08/01 23:10 Went out with Charlotte for our anniversary. Pizza at Cavalinos. She loved the rose necklace!", tool_call_id="12"),
+			    AIMessage("I'm afraid I cannot be of great help, since I obviously know charlotte way less than you, but last year you two went out to Cavalinons and you got her a rose necklace as a present. And she liked it. So maybe a pair of earrings would be something she'd like?", name="example_assistant", ),
+
+				HumanMessage("Did I write down anything yesterday or the day before that?"),
+			    AIMessage("", tool_calls=[{"name": "get_current_date_and_time", "args": {}, "id": "21" }], ),
+			    ToolMessage("Wednesday the 7th of August 2024 16:23", tool_call_id="21" ),
+			    AIMessage("", tool_calls=[{"name": "get_notes_in_timespan", "args": {"begin": "2024/08/05", "to": "2024/08/06"}, "id": "22"}], ),
+			    ToolMessage("2024/08/05 11:45 Ask Dr. Mills about the side effects of the new medication he got me.\n\n2024/08/06 18:30 Pick up the dry cleaning on Thursday; they close early on Fridays.", tool_call_id="22"),
+			    AIMessage("Yes. I found two entries.\n- From yesterday stating that you wanted to pickup the dry cleaning on Thursday, because they close early on Fridays\n- From Monday a note saying that you want to ask Dr. Mills about the side effects of the new medication he got you.", name="example_assistant", ),
+	        ],
+			"human_msg": "Last Saturday, who did grandma want me to call?",
+	        "tools": {
+	            "get_current_date_and_time": get_current_date_and_time,
+	            "get_notes_in_timespan": get_notes_in_timespan,
+	            "Write note": write_note
+	        }
+	    },
+        validator=system_human_answer_match,
+	    validation_input={
+    	    "criteria": dedent("""- containing the information that the Human should call Wolfgang 
+			- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
+	    }
+    ),
+	# 363: Test(),
+	# 600: Test(),
+	# 221: Test(),
+	# 985: Test(),
+	# 634: Test(),
+	# 927: Test(),
+	# 346: Test(),
+	# 995: Test(),
+	# 404: Test(),
+	# 299: Test(),
+	# 275: Test(),
+	# 852: Test(),
+	# 376: Test(),
+	# 263: Test(),
+	# 432: Test(),
+	# 270: Test(),
+	# 797: Test(),
+	# 340: Test(),
+	# 489: Test(),
+	# 786: Test(),
+	# 121: Test(),
+	# 971: Test(),
+	# 436: Test(),
+	# 147: Test(),
+	# 534: Test(),
+	# 190: Test(),
+	# 158: Test(),
+	# 191: Test(),
+}
+