mega commit

2024-08-20 20:47:17 +02:00
parent 4860179a1c
commit a578dd26a0
13 changed files with 608 additions and 305 deletions
--- a/suite_settings/models.py
+++ b/suite_settings/models.py
@@ -1,13 +1,112 @@
-models = [
-    "llama3.1", # 8b
-    "llama3.1:70b",
-    "llama3-groq-tool-use", # latest
-    "llama3-groq-tool-use:70b",
-    # "mixtral:8x7b",
-    "mixtral:8x22b",
-    # "gemma2:2b",
-    # "phi3", # 3.8b
-    # "tinyllama:1.1b",
-    "mistral-nemo:12b",
-    "command-r-plus:104b",
-]
+from libs.classes import Model
+
+models = {
+    245: Model(
+        display_name="llama3.1 8b", 
+        identifier="llama3.1",
+        supports_tools=True,
+        parameter_count_in_b=8
+    ),
+    238: Model(
+        display_name="llama3.1 70b",
+        identifier="llama3.1:70b",
+        supports_tools=True,
+        parameter_count_in_b=70
+    ),
+    120: Model(
+        display_name="llama3 groq TU 8b", 
+        identifier="llama3-groq-tool-use",
+        supports_tools=True,
+        parameter_count_in_b=8
+    ),
+    890: Model(
+        display_name="llama3 groq TU 70b",
+        identifier="llama3-groq-tool-use:70b",
+        supports_tools=True,
+        parameter_count_in_b=70
+    ),
+    348: Model(
+        display_name="Mixtral MoE 8x7b",
+        identifier="mixtral:8x7b",
+        supports_tools=False,
+        parameter_count_in_b=13,
+    ),
+    789: Model(
+        display_name="Mixtral MoE 8x22b",
+        identifier="mixtral:8x22b",
+        supports_tools=True,
+        parameter_count_in_b=39
+    ),
+    445: Model(
+        display_name="Gemma2 2b",
+        identifier="gemma2:2b",
+        supports_tools=False,
+        parameter_count_in_b=2
+    ),
+    475: Model(
+        display_name="Gemma2 9b",
+        identifier="gemma2:2b",
+        supports_tools=False,
+        parameter_count_in_b=9
+    ),
+    626: Model(
+        display_name="Gemma2 27b",
+        identifier="gemma2:2b",
+        supports_tools=False,
+        parameter_count_in_b=27
+    ),
+    229: Model(
+        display_name="Phi3 3.8b", 
+        identifier="phi3",
+        supports_tools=False,
+        parameter_count_in_b=3.8
+    ),
+    903: Model(
+        display_name="Tinyllama 1.1b",
+        identifier="tinyllama:1.1b",
+        supports_tools=False,
+        parameter_count_in_b=1.1
+    ),
+    670: Model(
+        display_name="Mistral Nemo 12b",
+        identifier="mistral-nemo:12b",
+        supports_tools=True,
+        parameter_count_in_b=12
+    ),
+    404: Model(
+        display_name="Command R+ 104b",
+        identifier="command-r-plus:104b",
+        supports_tools=True,
+        parameter_count_in_b=104
+    ),
+    701: Model(
+        display_name="Yi 6b",
+        identifier="yi:7b",
+        supports_tools=False,
+        parameter_count_in_b=6
+    ),
+    704: Model(
+        display_name="Yi 6b",
+        identifier="yi:7b",
+        supports_tools=False,
+        parameter_count_in_b=6
+    ),
+    724: Model(
+        display_name="Yi 34b",
+        identifier="yi:34b",
+        supports_tools=False,
+        parameter_count_in_b=34
+    ),
+    129: Model(
+        display_name="Yi 34b",
+        identifier="yi:34b",
+        supports_tools=False,
+        parameter_count_in_b=34
+    ),
+    853: Model(
+        display_name="Qwen2 0.5b",
+        identifier="qwen2:0.5b",
+        supports_tools=False,
+        parameter_count_in_b=0.5
+    ),
+}
--- a/suite_settings/tests.py
+++ b/suite_settings/tests.py
@@ -1,21 +1,21 @@
-from libs.test_class import Test
-from libs.runnables  import * 
-from libs.validators import * 
-from libs.tools      import *
+from libs.classes    import Test
+from libs.runnables  import basic_prompt, one_tool_call_answer, agent_with_tools
+from libs.validators import regex_match_any, system_human_answer_match 
+from libs.tools      import add, multiply, get_current_date_and_time, get_notes_in_timespan, get_notes_containing, write_note
 from textwrap        import dedent
 from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage, AIMessage

 tests = {
 	607: Test(
 	    name="Healthy Vegetables in Chinese",
-	    runnable=basic,
+	    runnable=basic_prompt,
 	    runnable_input={
 	        "system_msg": "You are a helpful assistant. You serve people across the globe.",
 	        "human_msg": "什么蔬菜最健康？",
 	    },
-        validator=system_human_answer_match,
+  	    validator=system_human_answer_match,
 	    validation_input={
-    	    "criteria": dedent("""- in Mandarin Chinese from front to finnish
+  	    "criteria": dedent("""- in Mandarin Chinese from front to finnish
 			- factually correct
 			- about healthy vegetables
 			- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)
@@ -23,7 +23,7 @@ tests = {
 			Again, the message has to be entirely in Manadarin Chineese.
 			That means If the answer is not in Chinese the answer is NOT correct! Only if the message in in Chinese rate as correct"""),
 	    }
-    ),
+  	),
    693: Test(
 	    name="Simple Multiplication",
 	    runnable=one_tool_call_answer,
@@ -52,12 +52,12 @@ tests = {
 	            "multiply": multiply
 	        }
 	    },
-        validator=regex_match_any,
+    	validator=regex_match_any,
 	    validation_input={
 	        "patterns": [ "6134205", "6.134.205", "6,134,205" ]
 	    }
-    ),
-    283: Test(
+  ),
+  283: Test(
 	    name="Notes from last Saturday",
 	    runnable=agent_with_tools,
 	    runnable_input={
@@ -67,16 +67,16 @@ tests = {
 	            "get_current_date_and_time": get_current_date_and_time,
 	            "get_notes_in_timespan": get_notes_in_timespan,
 	            "get_notes_containing": get_notes_containing,
-	            "Write note": write_note
+	            "Write note": write_note,
 	        }
 	    },
-        validator=system_human_answer_match,
+	    validator=system_human_answer_match,
 	    validation_input={
-    	    "criteria": dedent("""- containing the information that the Human should call Wolfgang 
+  		    "criteria": dedent("""- containing the information that the Human should call Wolfgang 
 			- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
 	    }
    ),
-    260: Test(
+  260: Test(
 	    name="Notes from last Saturday TSO", # time span only
 	    runnable=agent_with_tools,
 	    runnable_input={
@@ -88,15 +88,15 @@ tests = {
 	            "Write note": write_note
 	        }
 	    },
-        validator=system_human_answer_match,
+      	validator=system_human_answer_match,
 	    validation_input={
-    	    "criteria": dedent("""- containing the information that the Human should call Wolfgang 
+  		    "criteria": dedent("""- containing the information that the Human should call Wolfgang 
 			- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
 	    }
-    ),
+	),
 	856: Test(
 	    name="Notes from last Saturday TSO FSP", 
-	    runnable=agent_with_tools_fsp,
+	    runnable=agent_with_tools,
 	    runnable_input={
 		    "system_msg": "You are a helpful assistant. You can use tools to accomplish tasks. Once you've called a tool, the resulting tool_message content can be taken into consideration again. With that you can do \"multiple rounds\" of tool calling. To know the date, use the tool get_current_date_and_time.",
 	        "fsp_messages": [
@@ -121,12 +121,12 @@ tests = {
 	            "Write note": write_note
 	        }
 	    },
-        validator=system_human_answer_match,
+  	    validator=system_human_answer_match,
 	    validation_input={
-    	    "criteria": dedent("""- containing the information that the Human should call Wolfgang 
+	  	    "criteria": dedent("""- containing the information that the Human should call Wolfgang 
 			- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
 	    }
-    ),
+  	),
 	# 363: Test(),
 	# 600: Test(),
 	# 221: Test(),