Mul test, valdidation works, but printing it doesnt
This commit is contained in:
@@ -2,12 +2,13 @@ from libs.test_class import Test
|
||||
from libs.run_tests import run_tests
|
||||
from libs.runnables import *
|
||||
from libs.validators import *
|
||||
from libs.tools import *
|
||||
|
||||
from pprint import pprint
|
||||
|
||||
def main():
|
||||
models = [
|
||||
# "llama3.1", # 8b
|
||||
"llama3.1", # 8b
|
||||
# "llama3.1:70b",
|
||||
# "llama3-groq-tool-use", # latest
|
||||
# "llama3-groq-tool-use:70b",
|
||||
@@ -15,29 +16,45 @@ def main():
|
||||
# "mixtral:8x22b",
|
||||
# "gemma2:2b",
|
||||
# "phi3", # 3.8b
|
||||
"tinyllama:1.1b",
|
||||
# "tinyllama:1.1b",
|
||||
]
|
||||
seeds = [
|
||||
# 2,
|
||||
222,
|
||||
# 22222,
|
||||
2222222
|
||||
22222,
|
||||
# 2222222
|
||||
]
|
||||
tests = [
|
||||
Test(
|
||||
name="Chinese Fruit",
|
||||
system_msg="You are a helpful assistant. You serve people across the globe. You can be a freind, but stay professional.",
|
||||
human_msg="什么蔬菜最健康?",
|
||||
validation_info="""- in Mandarin Chinese
|
||||
- factually correct
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)""",
|
||||
runnable=basic,
|
||||
validator=system_human_answer_match
|
||||
runnable=basic,
|
||||
runnable_input={
|
||||
"system_msg": "You are a helpful assistant. You serve people across the globe.",
|
||||
"human_msg": "什么蔬菜最健康?",
|
||||
},
|
||||
validator=system_human_answer_match,
|
||||
validation_input={
|
||||
"criteria": """- in Mandarin Chinese
|
||||
- factually correct
|
||||
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)""",
|
||||
}
|
||||
),
|
||||
Test(
|
||||
name="Simple Multiplication",
|
||||
runnable=one_tool_call_answer,
|
||||
runnable_input={
|
||||
"system_msg": "You are a helpful assistant.",
|
||||
"human_msg": "What is 234215 times 143243?",
|
||||
"tools": {
|
||||
"add": add,
|
||||
"multiply": multiply
|
||||
}
|
||||
},
|
||||
validator=regex_match_any,
|
||||
validation_input={
|
||||
"patterns": ["33549659245", "33,549,659,245", "33.549.659.245"]
|
||||
}
|
||||
),
|
||||
# Test(
|
||||
# name="Simple Multiplication",
|
||||
# system_msg=
|
||||
# )
|
||||
]
|
||||
|
||||
results = run_tests(
|
||||
|
||||
Reference in New Issue
Block a user