Merge branch 'master' of ssh://git.nx2.site:20022/nx2/test-small-llms
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
from libs.classes import Test
|
||||
from libs.runnables import basic_prompt, one_tool_call_answer, agent_with_tools
|
||||
from libs.validators import regex_match_any, system_human_answer_match
|
||||
from libs.tools import add, multiply, get_current_date_and_time, get_notes_in_timespan, get_notes_containing, write_note
|
||||
from libs.tools import add, multiply, get_current_date_and_time, get_notes_in_timespan, get_notes_containing, write_note, save_python_repl
|
||||
from textwrap import dedent
|
||||
from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage, AIMessage
|
||||
|
||||
@@ -49,6 +49,19 @@ tests = {
|
||||
validator=regex_match_any,
|
||||
validation_input={"patterns": ["6134205", "6.134.205", "6,134,205"]},
|
||||
),
|
||||
363: Test(
|
||||
name="Complex Multiplication Python",
|
||||
runnable=one_tool_call_answer,
|
||||
runnable_input={
|
||||
"system_msg": 'You are a helpful assistant.',
|
||||
"human_msg": 'Is 31515261 divisible by 425? If not, whats the remainder?',
|
||||
"tools": { "python_repl": save_python_repl },
|
||||
},
|
||||
validator=regex_match_any,
|
||||
validation_input={
|
||||
"patterns": [ "236", "two ?hundred and thirty ?six", "two ?hundred thirty ?six" ]
|
||||
}
|
||||
),
|
||||
283: Test(
|
||||
name="Notes from last Saturday",
|
||||
runnable=agent_with_tools,
|
||||
@@ -65,7 +78,7 @@ tests = {
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
|
||||
},
|
||||
),
|
||||
260: Test(
|
||||
@@ -119,7 +132,6 @@ tests = {
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
|
||||
},
|
||||
),
|
||||
# 363: Test(),
|
||||
# 600: Test(),
|
||||
# 221: Test(),
|
||||
# 985: Test(),
|
||||
|
||||
Reference in New Issue
Block a user