From 298d8c83efb4f55fb29c0ee71a3d8fe9e346415e Mon Sep 17 00:00:00 2001
From: "Lennart J. Kurzweg (Nx2)" <git@nx2.site>
Date: Wed, 14 Aug 2024 21:02:12 +0200
Subject: [PATCH] restructuring

---
 suite_settings/models.py |  13 ++++
 suite_settings/seeds.py  |  17 +++++
 suite_settings/tests.py  | 159 +++++++++++++++++++++++++++++++++++++++
 test_small_llms.py       |  75 ------------------
 test_suite.py            |  16 ++++
 5 files changed, 205 insertions(+), 75 deletions(-)
 create mode 100644 suite_settings/models.py
 create mode 100644 suite_settings/seeds.py
 create mode 100644 suite_settings/tests.py
 delete mode 100644 test_small_llms.py
 create mode 100644 test_suite.py

diff --git a/suite_settings/models.py b/suite_settings/models.py
new file mode 100644
index 0000000..4b3181d
--- /dev/null
+++ b/suite_settings/models.py
@@ -0,0 +1,13 @@
+models = [
+    "llama3.1", # 8b
+    "llama3.1:70b",
+    "llama3-groq-tool-use", # latest
+    "llama3-groq-tool-use:70b",
+    # "mixtral:8x7b",
+    "mixtral:8x22b",
+    # "gemma2:2b",
+    # "phi3", # 3.8b
+    # "tinyllama:1.1b",
+    "mistral-nemo:12b",
+    "command-r-plus:104b",
+]
diff --git a/suite_settings/seeds.py b/suite_settings/seeds.py
new file mode 100644
index 0000000..5d5734e
--- /dev/null
+++ b/suite_settings/seeds.py
@@ -0,0 +1,17 @@
+seeds = [
+    12345678,
+    123456,
+    2,
+    222,
+    22222,
+    2222222,
+    31415926,
+    404,
+    62951413,
+    8008,
+    99999999,
+    789654,
+    10293847,
+    42,
+    911
+]
diff --git a/suite_settings/tests.py b/suite_settings/tests.py
new file mode 100644
index 0000000..c004ae2
--- /dev/null
+++ b/suite_settings/tests.py
@@ -0,0 +1,159 @@
+from libs.test_class import Test
+from libs.runnables  import * 
+from libs.validators import * 
+from libs.tools      import *
+from textwrap        import dedent
+from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage, AIMessage
+
+tests = {
+	607: Test(
+	    name="Healthy Vegetables in Chinese",
+	    runnable=basic,
+	    runnable_input={
+	        "system_msg": "You are a helpful assistant. You serve people across the globe.",
+	        "human_msg": "什么蔬菜最健康？",
+	    },
+        validator=system_human_answer_match,
+	    validation_input={
+    	    "criteria": dedent("""- in Mandarin Chinese from front to finnish
+			- factually correct
+			- about healthy vegetables
+			- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)
+
+			Again, the message has to be entirely in Manadarin Chineese.
+			That means If the answer is not in Chinese the answer is NOT correct! Only if the message in in Chinese rate as correct"""),
+	    }
+    ),
+    693: Test(
+	    name="Simple Multiplication",
+	    runnable=one_tool_call_answer,
+	    runnable_input={
+	        "system_msg": "You are a helpful assistant.",
+	        "human_msg": "What is 234215 times 143243?",
+	        "tools": {
+	            "add": add,
+	            "multiply": multiply
+	        }
+	    },
+        validator=regex_match_any,
+	    validation_input={
+	        "patterns": ["33549659245", "33,549,659,245", "33.549.659.245"]
+	        # "patterns": ["3[,\. ]?3[,\. ]?5[,\. ]?4[,\. ]?9[,\. ]?6[,\. ]?5[,\. ]?9[,\. ]?2[,\. ]?4[,\. ]?5"] # Would accept 3.354.965.9245
+	    }
+    ),
+	120: Test(
+	    name="Complex Multiplication",
+	    runnable=agent_with_tools,
+	    runnable_input={
+	        "system_msg": 'You are a helpful assistant. You can use tools to accomplish the task. Once you\'ve called a tool. the resulting tool_message content can be taken into consideration again. With that you can do "multiple rounds" of tool calling.',
+	        "human_msg": "What is 235 times 1243 times 21?",
+	        "tools": {
+	            "add": add,
+	            "multiply": multiply
+	        }
+	    },
+        validator=regex_match_any,
+	    validation_input={
+	        "patterns": [ "6134205", "6.134.205", "6,134,205" ]
+	    }
+    ),
+    283: Test(
+	    name="Notes from last Saturday",
+	    runnable=agent_with_tools,
+	    runnable_input={
+	        "system_msg": "You are a helpful assistant. You can use tools to accomplish tasks. Once you've called a tool, the resulting tool_message content can be taken into consideration again. With that you can do \"multiple rounds\" of tool calling. To know the date, use the tool get_current_date_and_time.",
+	        "human_msg": "Last Saturday, who did grandma want me to call?",
+	        "tools": {
+	            "get_current_date_and_time": get_current_date_and_time,
+	            "get_notes_in_timespan": get_notes_in_timespan,
+	            "get_notes_containing": get_notes_containing,
+	            "Write note": write_note
+	        }
+	    },
+        validator=system_human_answer_match,
+	    validation_input={
+    	    "criteria": dedent("""- containing the information that the Human should call Wolfgang 
+			- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
+	    }
+    ),
+    260: Test(
+	    name="Notes from last Saturday TSO", # time span only
+	    runnable=agent_with_tools,
+	    runnable_input={
+	        "system_msg": "You are a helpful assistant. You can use tools to accomplish tasks. Once you've called a tool, the resulting tool_message content can be taken into consideration again. With that you can do \"multiple rounds\" of tool calling. To know the date, use the tool get_current_date_and_time.",
+	        "human_msg": "Last Saturday, who did grandma want me to call?",
+	        "tools": {
+	            "get_current_date_and_time": get_current_date_and_time,
+	            "get_notes_in_timespan": get_notes_in_timespan,
+	            "Write note": write_note
+	        }
+	    },
+        validator=system_human_answer_match,
+	    validation_input={
+    	    "criteria": dedent("""- containing the information that the Human should call Wolfgang 
+			- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
+	    }
+    ),
+	856: Test(
+	    name="Notes from last Saturday TSO FSP", 
+	    runnable=agent_with_tools_fsp,
+	    runnable_input={
+		    "system_msg": "You are a helpful assistant. You can use tools to accomplish tasks. Once you've called a tool, the resulting tool_message content can be taken into consideration again. With that you can do \"multiple rounds\" of tool calling. To know the date, use the tool get_current_date_and_time.",
+	        "fsp_messages": [
+				HumanMessage("Tomorrow is the anniversary! Any tips what I should by her?"), # One year ago
+			    AIMessage("", tool_calls=[{"name": "get_current_date_and_time", "args": {}, "id": "11" }]),
+			    ToolMessage("Wednesday the 31st of Juli 2024 09:31", tool_call_id="11" ),
+			    AIMessage("", tool_calls=[{"name": "get_notes_in_timespan", "args": {"begin": "2023/08/01", "to": "2023/08/01"}, "id": "12"}], ),
+			    ToolMessage("2023/08/01 23:10 Went out with Charlotte for our anniversary. Pizza at Cavalinos. She loved the rose necklace!", tool_call_id="12"),
+			    AIMessage("I'm afraid I cannot be of great help, since I obviously know charlotte way less than you, but last year you two went out to Cavalinons and you got her a rose necklace as a present. And she liked it. So maybe a pair of earrings would be something she'd like?", name="example_assistant", ),
+
+				HumanMessage("Did I write down anything yesterday or the day before that?"),
+			    AIMessage("", tool_calls=[{"name": "get_current_date_and_time", "args": {}, "id": "21" }], ),
+			    ToolMessage("Wednesday the 7th of August 2024 16:23", tool_call_id="21" ),
+			    AIMessage("", tool_calls=[{"name": "get_notes_in_timespan", "args": {"begin": "2024/08/05", "to": "2024/08/06"}, "id": "22"}], ),
+			    ToolMessage("2024/08/05 11:45 Ask Dr. Mills about the side effects of the new medication he got me.\n\n2024/08/06 18:30 Pick up the dry cleaning on Thursday; they close early on Fridays.", tool_call_id="22"),
+			    AIMessage("Yes. I found two entries.\n- From yesterday stating that you wanted to pickup the dry cleaning on Thursday, because they close early on Fridays\n- From Monday a note saying that you want to ask Dr. Mills about the side effects of the new medication he got you.", name="example_assistant", ),
+	        ],
+			"human_msg": "Last Saturday, who did grandma want me to call?",
+	        "tools": {
+	            "get_current_date_and_time": get_current_date_and_time,
+	            "get_notes_in_timespan": get_notes_in_timespan,
+	            "Write note": write_note
+	        }
+	    },
+        validator=system_human_answer_match,
+	    validation_input={
+    	    "criteria": dedent("""- containing the information that the Human should call Wolfgang 
+			- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
+	    }
+    ),
+	# 363: Test(),
+	# 600: Test(),
+	# 221: Test(),
+	# 985: Test(),
+	# 634: Test(),
+	# 927: Test(),
+	# 346: Test(),
+	# 995: Test(),
+	# 404: Test(),
+	# 299: Test(),
+	# 275: Test(),
+	# 852: Test(),
+	# 376: Test(),
+	# 263: Test(),
+	# 432: Test(),
+	# 270: Test(),
+	# 797: Test(),
+	# 340: Test(),
+	# 489: Test(),
+	# 786: Test(),
+	# 121: Test(),
+	# 971: Test(),
+	# 436: Test(),
+	# 147: Test(),
+	# 534: Test(),
+	# 190: Test(),
+	# 158: Test(),
+	# 191: Test(),
+}
+
diff --git a/test_small_llms.py b/test_small_llms.py
deleted file mode 100644
index 1d1af44..0000000
--- a/test_small_llms.py
+++ /dev/null
@@ -1,75 +0,0 @@
-from libs.test_class import Test
-from libs.run_tests import run_tests
-from libs.runnables import * 
-from libs.validators import * 
-from libs.tools import *
-
-from pprint import pprint
-
-def main():
-    models = [
-        "llama3.1", # 8b
-        "llama3.1:70b",
-        "llama3-groq-tool-use", # latest
-        "llama3-groq-tool-use:70b",
-        # "mixtral:8x7b",
-        "mixtral:8x22b",
-        # "gemma2:2b",
-        # "phi3", # 3.8b
-        # "tinyllama:1.1b",
-        "mistral-nemo:12b",
-        # "command-r-plus:104b",
-    ]
-    seeds = [
-        2,
-        222,
-        22222,
-        2222222
-    ]
-    tests = [
-        Test(
-    	    name="Chinese Fruit",
-    	    runnable=basic,
-    	    runnable_input={
-    	        "system_msg": "You are a helpful assistant. You serve people across the globe.",
-    	        "human_msg": "什么蔬菜最健康？",
-    	    },
-            validator=system_human_answer_match,
-    	    validation_input={
-        	    "criteria": """- in Mandarin Chinese from front to finnish
-- factually correct
-- about healthy vegetables
-- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)
-
-Again, the message has to be entirely in Manadarin Chineese.
-That means If the answer is not in Chinese the answer is NOT correct! Only if the message in in Chinese rate as correct""",
-    	    }
-        ),
-        Test(
-    	    name="Simple Multiplication",
-    	    runnable=one_tool_call_answer,
-    	    runnable_input={
-    	        "system_msg": "You are a helpful assistant.",
-    	        "human_msg": "What is 234215 times 143243?",
-    	        "tools": {
-    	            "add": add,
-    	            "multiply": multiply
-    	        }
-    	    },
-            validator=regex_match_any,
-    	    validation_input={
-    	        "patterns": ["33549659245", "33,549,659,245", "33.549.659.245"]
-    	        # "patterns": ["3[,\. ]?3[,\. ]?5[,\. ]?4[,\. ]?9[,\. ]?6[,\. ]?5[,\. ]?9[,\. ]?2[,\. ]?4[,\. ]?5"] # Would accept 3.354.965.9245
-    	    }
-        ),
-    ]
-
-    results = run_tests(
-        models=models,
-        seeds=seeds,
-        tests=tests,
-        base_url="http://bolt.hs-mittweida.de:11434"
-    )
-
-if __name__ == "__main__":
-    main()
diff --git a/test_suite.py b/test_suite.py
new file mode 100644
index 0000000..aa91b29
--- /dev/null
+++ b/test_suite.py
@@ -0,0 +1,16 @@
+from libs.run_tests        import run_tests
+from suite_settings.models import models
+from suite_settings.seeds  import seeds
+from suite_settings.tests  import tests
+
+def main():
+
+    results = run_tests(
+        models=models,
+        seeds=seeds,
+        tests=tests,
+        base_url="http://bolt.hs-mittweida.de:11434"
+    )
+
+if __name__ == "__main__":
+    main()