mega commit
This commit is contained in:
@@ -1,21 +1,21 @@
|
||||
from libs.test_class import Test
|
||||
from libs.runnables import *
|
||||
from libs.validators import *
|
||||
from libs.tools import *
|
||||
from libs.classes import Test
|
||||
from libs.runnables import basic_prompt, one_tool_call_answer, agent_with_tools
|
||||
from libs.validators import regex_match_any, system_human_answer_match
|
||||
from libs.tools import add, multiply, get_current_date_and_time, get_notes_in_timespan, get_notes_containing, write_note
|
||||
from textwrap import dedent
|
||||
from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage, AIMessage
|
||||
|
||||
tests = {
|
||||
607: Test(
|
||||
name="Healthy Vegetables in Chinese",
|
||||
runnable=basic,
|
||||
runnable=basic_prompt,
|
||||
runnable_input={
|
||||
"system_msg": "You are a helpful assistant. You serve people across the globe.",
|
||||
"human_msg": "什么蔬菜最健康?",
|
||||
},
|
||||
validator=system_human_answer_match,
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": dedent("""- in Mandarin Chinese from front to finnish
|
||||
"criteria": dedent("""- in Mandarin Chinese from front to finnish
|
||||
- factually correct
|
||||
- about healthy vegetables
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)
|
||||
@@ -23,7 +23,7 @@ tests = {
|
||||
Again, the message has to be entirely in Manadarin Chineese.
|
||||
That means If the answer is not in Chinese the answer is NOT correct! Only if the message in in Chinese rate as correct"""),
|
||||
}
|
||||
),
|
||||
),
|
||||
693: Test(
|
||||
name="Simple Multiplication",
|
||||
runnable=one_tool_call_answer,
|
||||
@@ -52,12 +52,12 @@ tests = {
|
||||
"multiply": multiply
|
||||
}
|
||||
},
|
||||
validator=regex_match_any,
|
||||
validator=regex_match_any,
|
||||
validation_input={
|
||||
"patterns": [ "6134205", "6.134.205", "6,134,205" ]
|
||||
}
|
||||
),
|
||||
283: Test(
|
||||
),
|
||||
283: Test(
|
||||
name="Notes from last Saturday",
|
||||
runnable=agent_with_tools,
|
||||
runnable_input={
|
||||
@@ -67,16 +67,16 @@ tests = {
|
||||
"get_current_date_and_time": get_current_date_and_time,
|
||||
"get_notes_in_timespan": get_notes_in_timespan,
|
||||
"get_notes_containing": get_notes_containing,
|
||||
"Write note": write_note
|
||||
"Write note": write_note,
|
||||
}
|
||||
},
|
||||
validator=system_human_answer_match,
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
|
||||
}
|
||||
),
|
||||
260: Test(
|
||||
260: Test(
|
||||
name="Notes from last Saturday TSO", # time span only
|
||||
runnable=agent_with_tools,
|
||||
runnable_input={
|
||||
@@ -88,15 +88,15 @@ tests = {
|
||||
"Write note": write_note
|
||||
}
|
||||
},
|
||||
validator=system_human_answer_match,
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
|
||||
}
|
||||
),
|
||||
),
|
||||
856: Test(
|
||||
name="Notes from last Saturday TSO FSP",
|
||||
runnable=agent_with_tools_fsp,
|
||||
runnable=agent_with_tools,
|
||||
runnable_input={
|
||||
"system_msg": "You are a helpful assistant. You can use tools to accomplish tasks. Once you've called a tool, the resulting tool_message content can be taken into consideration again. With that you can do \"multiple rounds\" of tool calling. To know the date, use the tool get_current_date_and_time.",
|
||||
"fsp_messages": [
|
||||
@@ -121,12 +121,12 @@ tests = {
|
||||
"Write note": write_note
|
||||
}
|
||||
},
|
||||
validator=system_human_answer_match,
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
"criteria": dedent("""- containing the information that the Human should call Wolfgang
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes, what specific tool was used to get the answer, etc.)""")
|
||||
}
|
||||
),
|
||||
),
|
||||
# 363: Test(),
|
||||
# 600: Test(),
|
||||
# 221: Test(),
|
||||
|
||||
Reference in New Issue
Block a user