Merge branch 'master' of ssh://git.nx2.site:20022/nx2/test-small-llms

2024-08-28 20:46:41 +02:00
parent 209c5850e9 085db228ad
commit 895565c3ea
3 changed files with 130 additions and 31 deletions
--- a/suite_settings/tests.py
+++ b/suite_settings/tests.py
@@ -1,7 +1,7 @@
 from libs.classes import Test
 from libs.runnables import basic_prompt, one_tool_call_answer, agent_with_tools
 from libs.validators import regex_match_any, system_human_answer_match
-from libs.tools import add, multiply, get_current_date_and_time, get_notes_in_timespan, get_notes_containing, write_note
+from libs.tools import add, multiply, get_current_date_and_time, get_notes_in_timespan, get_notes_containing, write_note, save_python_repl
 from textwrap import dedent
 from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage, AIMessage

@@ -49,6 +49,19 @@ tests = {
        validator=regex_match_any,
        validation_input={"patterns": ["6134205", "6.134.205", "6,134,205"]},
    ),
+    363: Test(
+        name="Complex Multiplication Python",
+        runnable=one_tool_call_answer,
+        runnable_input={
+            "system_msg": 'You are a helpful assistant.',
+            "human_msg": 'Is 31515261 divisible by 425? If not, whats the remainder?',
+            "tools": { "python_repl": save_python_repl },
+        },
+        validator=regex_match_any,
+        validation_input={
+            "patterns": [ "236", "two ?hundred and thirty ?six", "two ?hundred thirty ?six" ]
+        }
+    ),
    283: Test(
        name="Notes from last Saturday",
        runnable=agent_with_tools,
@@ -65,7 +78,7 @@ tests = {
        validator=system_human_answer_match,
        validation_input={
            "criteria": dedent("""- containing the information that the Human should call Wolfgang 
-    - just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
+            - just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
        },
    ),
    260: Test(
@@ -119,7 +132,6 @@ tests = {
            - just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
        },
    ),
-    # 363: Test(),
    # 600: Test(),
    # 221: Test(),
    # 985: Test(),