Merge branch 'master' of ssh://git.nx2.site:20022/nx2/test-small-llms

This commit is contained in:
Lennart J. Kurzweg (Nx2)
2024-08-28 20:46:41 +02:00
3 changed files with 130 additions and 31 deletions

View File

@@ -1,7 +1,7 @@
from libs.classes import Test
from libs.runnables import basic_prompt, one_tool_call_answer, agent_with_tools
from libs.validators import regex_match_any, system_human_answer_match
from libs.tools import add, multiply, get_current_date_and_time, get_notes_in_timespan, get_notes_containing, write_note
from libs.tools import add, multiply, get_current_date_and_time, get_notes_in_timespan, get_notes_containing, write_note, save_python_repl
from textwrap import dedent
from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage, AIMessage
@@ -49,6 +49,19 @@ tests = {
validator=regex_match_any,
validation_input={"patterns": ["6134205", "6.134.205", "6,134,205"]},
),
363: Test(
name="Complex Multiplication Python",
runnable=one_tool_call_answer,
runnable_input={
"system_msg": 'You are a helpful assistant.',
"human_msg": 'Is 31515261 divisible by 425? If not, whats the remainder?',
"tools": { "python_repl": save_python_repl },
},
validator=regex_match_any,
validation_input={
"patterns": [ "236", "two ?hundred and thirty ?six", "two ?hundred thirty ?six" ]
}
),
283: Test(
name="Notes from last Saturday",
runnable=agent_with_tools,
@@ -65,7 +78,7 @@ tests = {
validator=system_human_answer_match,
validation_input={
"criteria": dedent("""- containing the information that the Human should call Wolfgang
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
},
),
260: Test(
@@ -119,7 +132,6 @@ tests = {
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
},
),
# 363: Test(),
# 600: Test(),
# 221: Test(),
# 985: Test(),