Mul test, valdidation works, but printing it doesnt

This commit is contained in:
Lennart J. Kurzweg (Nx2)
2024-08-05 14:09:06 +02:00
parent 52a180b936
commit abd6320ce9
6 changed files with 99 additions and 31 deletions

View File

@@ -2,12 +2,13 @@ from libs.test_class import Test
from libs.run_tests import run_tests
from libs.runnables import *
from libs.validators import *
from libs.tools import *
from pprint import pprint
def main():
models = [
# "llama3.1", # 8b
"llama3.1", # 8b
# "llama3.1:70b",
# "llama3-groq-tool-use", # latest
# "llama3-groq-tool-use:70b",
@@ -15,29 +16,45 @@ def main():
# "mixtral:8x22b",
# "gemma2:2b",
# "phi3", # 3.8b
"tinyllama:1.1b",
# "tinyllama:1.1b",
]
seeds = [
# 2,
222,
# 22222,
2222222
22222,
# 2222222
]
tests = [
Test(
name="Chinese Fruit",
system_msg="You are a helpful assistant. You serve people across the globe. You can be a freind, but stay professional.",
human_msg="什么蔬菜最健康?",
validation_info="""- in Mandarin Chinese
- factually correct
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)""",
runnable=basic,
validator=system_human_answer_match
runnable=basic,
runnable_input={
"system_msg": "You are a helpful assistant. You serve people across the globe.",
"human_msg": "什么蔬菜最健康?",
},
validator=system_human_answer_match,
validation_input={
"criteria": """- in Mandarin Chinese
- factually correct
- just one single conversational answer, without any AI fragments (A/B versions, "end of message" parts, unfitting discalimers or notes)""",
}
),
Test(
name="Simple Multiplication",
runnable=one_tool_call_answer,
runnable_input={
"system_msg": "You are a helpful assistant.",
"human_msg": "What is 234215 times 143243?",
"tools": {
"add": add,
"multiply": multiply
}
},
validator=regex_match_any,
validation_input={
"patterns": ["33549659245", "33,549,659,245", "33.549.659.245"]
}
),
# Test(
# name="Simple Multiplication",
# system_msg=
# )
]
results = run_tests(